Thu Jan 3 00:36:00 PST 2013

.gitignore                             |    8 
 MAINTAINERS                            |    5 
 Makefile                               |   15 -
 Makefile.objs                          |   67 ----
 Makefile.target                        |    8 
 arch_init.c                            |  245 ++++++++---------
 block-migration.c                      |   49 ---
 block/raw-posix.c                      |   34 ++
 block/raw-win32.c                      |    4 
 block/sheepdog.c                       |   11 
 buffered_file.c                        |  268 -------------------
 buffered_file.h                        |   22 -
 configure                              |   67 ++++
 cutils.c                               |    6 
 disas/Makefile.objs                    |    4 
 dump.c                                 |    8 
 exec.c                                 |  128 ++++++---
 hw/Makefile.objs                       |    3 
 hw/alpha_dp264.c                       |   18 -
 hw/alpha_sys.h                         |    2 
 hw/alpha_typhoon.c                     |   30 +-
 hw/dataplane/Makefile.objs             |    3 
 hw/dataplane/event-poll.c              |  100 +++++++
 hw/dataplane/event-poll.h              |   40 ++
 hw/dataplane/hostmem.c                 |  176 ++++++++++++
 hw/dataplane/hostmem.h                 |   57 ++++
 hw/dataplane/ioq.c                     |  117 ++++++++
 hw/dataplane/ioq.h                     |   57 ++++
 hw/dataplane/virtio-blk.c              |  465 +++++++++++++++++++++++++++++++++
 hw/dataplane/virtio-blk.h              |   29 ++
 hw/dataplane/vring.c                   |  362 +++++++++++++++++++++++++
 hw/dataplane/vring.h                   |   62 ++++
 hw/i2c.h                               |    3 
 hw/kvm/apic.c                          |   10 
 hw/kvm/clock.c                         |    2 
 hw/kvm/pci-assign.c                    |   14 
 hw/pci/msix.c                          |   19 +
 hw/pci/msix.h                          |    6 
 hw/pci/pci.h                           |    4 
 hw/pci/pcie.c                          |    2 
 hw/pflash_cfi01.c                      |    3 
 hw/ppc.c                               |  128 +++++----
 hw/ppc.h                               |    4 
 hw/ppc/e500.c                          |    2 
 hw/ppc405_uc.c                         |   10 
 hw/ppc440_bamboo.c                     |    2 
 hw/ppc4xx.h                            |    6 
 hw/ppc4xx_devs.c                       |    8 
 hw/ppc_booke.c                         |   44 +--
 hw/qdev-properties-system.c            |  358 +++++++++++++++++++++++++
 hw/qdev-properties.c                   |  356 +------------------------
 hw/qdev-properties.h                   |    1 
 hw/qdev.c                              |   13 
 hw/s390-virtio-bus.c                   |   36 +-
 hw/spapr.c                             |    2 
 hw/tmp105.c                            |   17 -
 hw/tmp105.h                            |   67 ++++
 hw/usb/redirect.c                      |    1 
 hw/vfio_pci.c                          |    2 
 hw/virtex_ml507.c                      |    2 
 hw/virtio-blk.c                        |   53 +++
 hw/virtio-blk.h                        |    5 
 hw/virtio-pci.c                        |  116 +++++---
 hw/virtio.c                            |    2 
 hw/virtio.h                            |   26 -
 include/block/block.h                  |    9 
 include/exec/cpu-all.h                 |   15 -
 include/exec/cpu-common.h              |   12 
 include/exec/cpu-defs.h                |    8 
 include/exec/memory.h                  |   16 +
 include/migration/migration.h          |   13 
 include/migration/qemu-file.h          |    5 
 include/migration/vmstate.h            |    1 
 include/qemu-common.h                  |    3 
 include/qemu/iov.h                     |   13 
 include/qemu/thread-posix.h            |    2 
 include/qom/cpu.h                      |   11 
 include/sysemu/kvm.h                   |   32 +-
 include/sysemu/sysemu.h                |    1 
 iov.c                                  |   90 +++++-
 kvm-all.c                              |  127 +++++----
 memory.c                               |   16 +
 memory_mapping.c                       |    4 
 migration-exec.c                       |    3 
 migration-fd.c                         |    4 
 migration-tcp.c                        |    3 
 migration-unix.c                       |    3 
 migration.c                            |  390 ++++++++++++++++++++-------
 net/tap-win32.c                        |    1 
 net/vde.c                              |    1 
 pc-bios/acpi-dsdt.aml                  |binary
 pc-bios/bios.bin                       |binary
 pc-bios/q35-acpi-dsdt.aml              |binary
 qemu-char.c                            |   12 
 qemu-img.c                             |   10 
 qemu-thread-posix.c                    |   11 
 qemu-thread-win32.c                    |   24 -
 roms/seabios                           |    2 
 savevm.c                               |   64 +---
 scripts/tracetool/backend/dtrace.py    |    2 
 scripts/tracetool/format/h.py          |    6 
 target-alpha/cpu-qom.h                 |    3 
 target-alpha/cpu.c                     |  214 +++++++++++++++
 target-alpha/cpu.h                     |   18 +
 target-alpha/sys_helper.c              |    6 
 target-alpha/translate.c               |   58 ----
 target-arm/helper.c                    |    9 
 target-i386/arch_dump.c                |    2 
 target-i386/cpu.c                      |   13 
 target-i386/cpu.h                      |    2 
 target-i386/kvm.c                      |  254 ++++++++++--------
 target-i386/machine.c                  |   21 +
 target-m68k/helper.c                   |    9 
 target-mips/dsp_helper.c               |   55 +--
 target-mips/op_helper.c                |    7 
 target-mips/translate.c                |    3 
 target-ppc/kvm.c                       |  124 +++++---
 target-ppc/kvm_ppc.h                   |    8 
 target-s390x/cpu.h                     |   12 
 target-s390x/interrupt.c               |    3 
 target-s390x/kvm.c                     |  176 ++++++------
 target-s390x/misc_helper.c             |    2 
 target-unicore32/helper.c              |    2 
 target-xtensa/translate.c              |    6 
 tcg/hppa/tcg-target.c                  |    2 
 tcg/i386/tcg-target.c                  |   31 ++
 tcg/i386/tcg-target.h                  |    5 
 tcg/tcg-op.h                           |    2 
 tcg/tcg.c                              |    1 
 tcg/tcg.h                              |    3 
 tests/tcg/mips/mips32-dsp/extr_r_w.c   |   23 +
 tests/tcg/mips/mips32-dsp/extr_rs_w.c  |   23 +
 tests/tcg/mips/mips32-dsp/extr_s_h.c   |   23 +
 tests/tcg/mips/mips32-dsp/extr_w.c     |   23 +
 tests/tcg/mips/mips32-dsp/extrv_r_w.c  |   25 +
 tests/tcg/mips/mips32-dsp/extrv_rs_w.c |   25 +
 tests/tcg/mips/mips32-dsp/extrv_s_h.c  |   17 +
 tests/tcg/mips/mips32-dsp/extrv_w.c    |   26 +
 tests/tcg/mips/mips32-dsp/rddsp.c      |   32 --
 tests/tcg/mips/mips32-dsp/wrdsp.c      |   32 --
 tests/test-iov.c                       |  150 ++++++++++
 trace-events                           |    9 
 trace.h                                |    6 
 trace/Makefile.objs                    |   70 ++++
 translate-all.c                        |   12 
 ui/Makefile.objs                       |    1 
 vl.c                                   |   28 +
 xen-all.c                              |    1 
 148 files changed, 4432 insertions(+), 1853 deletions(-)

New commits:
commit dbd99ae302be8f51b547fb6283c91d0c9859b7d5
Author: Stefan Weil <sw at weilnetz.de>
Date:   Tue Jan 1 18:33:44 2013 +0100

    configure: Write new file "config-all-disas.mak" when running configure
    
    Incremental builds added new lines to that file each time when configure
    was run.
    
    Now a new file with a comment line is written.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/configure b/configure
index cc1e20a..9538041 100755
--- a/configure
+++ b/configure
@@ -3314,6 +3314,8 @@ fi
 config_host_mak="config-host.mak"
 config_host_ld="config-host.ld"
 
+echo "# Automatically generated by configure - do not modify" >config-all-disas.mak
+
 echo "# Automatically generated by configure - do not modify" > $config_host_mak
 printf "# Configured with:" >> $config_host_mak
 printf " '%s'" "$0" "$@" >> $config_host_mak
commit 503483336039a8b2b182535f87f4820d259fca82
Author: Stefan Weil <sw at weilnetz.de>
Date:   Tue Jan 1 18:43:56 2013 +0100

    tci: Fix broken builds with TCG interpreter
    
    TCI no longer compiled after commit 76cad71136b7eb371cf2a2a4e1621cfe8d9c769a.
    
    The TCI disassembler depends on data structures which are different for
    each QEMU target, so it cannot be compiled as a universal-obj today.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/Makefile.target b/Makefile.target
index be8b8b8..5bfa4960 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -69,13 +69,12 @@ all: $(PROGS) stap
 obj-y = exec.o translate-all.o cpu-exec.o
 obj-y += tcg/tcg.o tcg/optimize.o
 obj-$(CONFIG_TCG_INTERPRETER) += tci.o
+obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
 obj-$(CONFIG_GDBSTUB_XML) += gdbstub-xml.o
 
-tci-dis.o: QEMU_CFLAGS += -I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/tci
-
 #########################################################
 # Linux user emulator target
 
diff --git a/disas/Makefile.objs b/disas/Makefile.objs
index 9134429..3f5c5b9 100644
--- a/disas/Makefile.objs
+++ b/disas/Makefile.objs
@@ -13,4 +13,6 @@ universal-obj-$(CONFIG_SH4_DIS) += sh4.o
 universal-obj-$(CONFIG_SPARC_DIS) += sparc.o
 universal-obj-$(CONFIG_LM32_DIS) += lm32.o
 
-universal-obj-$(CONFIG_TCI_DIS) += tci.o
+# TODO: As long as the TCG interpreter and its generated code depend
+# on the QEMU target, we cannot compile the disassembler here.
+#universal-obj-$(CONFIG_TCI_DIS) += tci.o
commit 74e91370beb3fabda515623b4491a8b7a024304a
Author: Michael Tokarev <mjt at tls.msk.ru>
Date:   Mon Dec 31 15:30:31 2012 +0400

    savevm.c: cleanup system includes
    
    savevm.c suffers from the same problem as some other files.
    Some years ago savevm.c was created from vl.c, moving some
    code from there into a separate file.  At that time, all
    includes were just copied from vl.c to savevm.c, without
    checking which ones are needed and which are not.
    
    But actually most of that stuff is _not_ needed.  More, some
    stuff is wrong, for example, *BSD #ifdef'ery around <util.h>
    vs <libutil.h> - for one, it fails to build on Debian/kFreebsd.
    
    Just remove all this.  Maybe there's a possibility to clean
    it up further - like removing <windows.h> (and maybe including
    winsock.h for htons etc), and maybe it's possible to remove
    some internal #includes too, but I didn't check this.
    
    While at it, remove duplicate #include of qemu/timer.h.
    
    Signed-off-by: Michael Tokarev <mjt at tls.msk.ru>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/savevm.c b/savevm.c
index bcdb92e..529d60e 100644
--- a/savevm.c
+++ b/savevm.c
@@ -21,52 +21,15 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include <unistd.h>
-#include <fcntl.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <zlib.h>
-
-/* Needed early for CONFIG_BSD etc. */
+
 #include "config-host.h"
 
 #ifndef _WIN32
-#include <sys/times.h>
-#include <sys/wait.h>
-#include <termios.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <sys/resource.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <net/if.h>
 #include <arpa/inet.h>
-#include <dirent.h>
-#include <netdb.h>
-#include <sys/select.h>
-#ifdef CONFIG_BSD
-#include <sys/stat.h>
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
-#include <libutil.h>
-#else
-#include <util.h>
-#endif
-#ifdef __linux__
-#include <pty.h>
-#include <malloc.h>
-#include <linux/rtc.h>
-#endif
-#endif
 #endif
 
 #ifdef _WIN32
 #include <windows.h>
-#include <malloc.h>
-#include <sys/timeb.h>
-#include <mmsystem.h>
-#define getopt_long_only getopt_long
-#define memalign(align, size) malloc(size)
 #endif
 
 #include "qemu-common.h"
@@ -80,7 +43,6 @@
 #include "migration/migration.h"
 #include "qemu/sockets.h"
 #include "qemu/queue.h"
-#include "qemu/timer.h"
 #include "sysemu/cpus.h"
 #include "exec/memory.h"
 #include "qmp-commands.h"
commit ab51b1d568e02c80b1abf9016bda3a86dc1db389
Author: Michael Tokarev <mjt at tls.msk.ru>
Date:   Sun Dec 30 12:48:14 2012 +0400

    disallow -daemonize usage of stdio (curses display, -nographic, -serial stdio etc)
    
    Curses display requires stdin/out to stay on the terminal,
    so -daemonize makes no sense in this case.  Instead of
    leaving display uninitialized like is done since 995ee2bf469de6bb,
    explicitly detect this case earlier and error out.
    
    -nographic can actually be used with -daemonize, by redirecting
    everything to a null device, but the problem is that according
    to documentation and historical behavour, -nographic redirects
    guest ports to stdin/out, which, again, makes no sense in case
    of -daemonize.  Since -nographic is a legacy option, don't bother
    fixing this case (to allow -nographic and -daemonize by redirecting
    guest ports to null instead of stdin/out in this case), but disallow
    it completely instead, to stop garbling host terminal.
    
    If no display display needed and user wants to use -nographic,
    the right way to go is to use
      -serial null -parallel null -monitor none -display none -vga none
    instead of -nographic.
    
    Also prevent the same issue -- it was possible to get garbled
    host tty after
    
      -nographic -daemonize
    
    and it is still possible to have it by using
    
      -serial stdio -daemonize
    
    Fix this by disallowing opening stdio chardev when -daemonize
    is specified.
    
    Signed-off-by: Michael Tokarev <mjt at tls.msk.ru>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/qemu-char.c b/qemu-char.c
index c6382a9..331ad5c 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -772,6 +772,10 @@ static CharDriverState *qemu_chr_open_stdio(QemuOpts *opts)
     if (stdio_nb_clients >= STDIO_MAX_CLIENTS) {
         return NULL;
     }
+    if (is_daemonized()) {
+        error_report("cannot use stdio with -daemonize");
+        return NULL;
+    }
     if (stdio_nb_clients == 0) {
         old_fd0_flags = fcntl(0, F_GETFL);
         tcgetattr (0, &oldtty);
diff --git a/vl.c b/vl.c
index e6a8d89..f056c95 100644
--- a/vl.c
+++ b/vl.c
@@ -3637,6 +3637,30 @@ int main(int argc, char **argv, char **envp)
         default_sdcard = 0;
     }
 
+    if (is_daemonized()) {
+        /* According to documentation and historically, -nographic redirects
+         * serial port, parallel port and monitor to stdio, which does not work
+         * with -daemonize.  We can redirect these to null instead, but since
+         * -nographic is legacy, let's just error out.
+         * We disallow -nographic only if all other ports are not redirected
+         * explicitly, to not break existing legacy setups which uses
+         * -nographic _and_ redirects all ports explicitly - this is valid
+         * usage, -nographic is just a no-op in this case.
+         */
+        if (display_type == DT_NOGRAPHIC
+            && (default_parallel || default_serial
+                || default_monitor || default_virtcon)) {
+            fprintf(stderr, "-nographic can not be used with -daemonize\n");
+            exit(1);
+        }
+#ifdef CONFIG_CURSES
+        if (display_type == DT_CURSES) {
+            fprintf(stderr, "curses display can not be used with -daemonize\n");
+            exit(1);
+        }
+#endif
+    }
+
     if (display_type == DT_NOGRAPHIC) {
         if (default_parallel)
             add_device_config(DEV_PARALLEL, "null");
@@ -3903,9 +3927,7 @@ int main(int argc, char **argv, char **envp)
         break;
 #if defined(CONFIG_CURSES)
     case DT_CURSES:
-        if (!is_daemonized()) {
-            curses_display_init(ds, full_screen);
-        }
+        curses_display_init(ds, full_screen);
         break;
 #endif
 #if defined(CONFIG_SDL)
commit 217da7fdeb2a4c99c49f22f9dc64c8df2e3a4387
Merge: 9a8a5ae d6b1ef8
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 2 12:19:27 2013 -0600

    Merge remote-tracking branch 'stefanha/block' into staging
    
    * stefanha/block:
      sheepdog: pass oid directly to send_pending_req()
      sheepdog: don't update inode when create_and_write fails
      block/raw-win32: Fix compiler warnings (wrong format specifiers)
      qemu-img: report size overflow error message
      cutils: change strtosz_suffix_unit function
      virtio-blk: Return UNSUPP for unknown request types
      virtio-blk: add x-data-plane=on|off performance feature
      dataplane: add virtio-blk data plane code
      virtio-blk: restore VirtIOBlkConf->config_wce flag
      iov: add qemu_iovec_concat_iov()
      test-iov: add iov_discard_front/back() testcases
      iov: add iov_discard_front/back() to remove data
      dataplane: add Linux AIO request queue
      dataplane: add event loop
      dataplane: add virtqueue vring code
      dataplane: add host memory mapping code
      configure: add CONFIG_VIRTIO_BLK_DATA_PLANE
      raw-posix: add raw_get_aio_fd() for virtio-blk-data-plane
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 9a8a5ae69d3a436e51a7eb2edafe254572f60823
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sun Dec 30 08:20:13 2012 +0100

    tcg: Remove unneeded assertion
    
    Commit 7f6f0ae5b95adfa76e10eabe2c34424a955fd10c added two assertions.
    
    One of these assertions is not needed:
    The pointer ts is never NULL because it is initialized with the
    address of an array element.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index ede51a3..9275e37 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -800,7 +800,6 @@ static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
 
     assert(idx >= 0 && idx < s->nb_temps);
     ts = &s->temps[idx];
-    assert(ts);
     if (idx < s->nb_globals) {
         pstrcpy(buf, buf_size, ts->name);
     } else {
commit d6b1ef89a1ede41334e4d0fa27e600e0b4d4f209
Author: Liu Yuan <tailai.ly at taobao.com>
Date:   Mon Dec 17 14:17:27 2012 +0800

    sheepdog: pass oid directly to send_pending_req()
    
    Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
    Cc: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
    Reviewed-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/sheepdog.c b/block/sheepdog.c
index b9186fb..e821746 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -724,7 +724,7 @@ static void coroutine_fn aio_read_response(void *opaque)
              * create requests are not allowed, so we search the
              * pending requests here.
              */
-            send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx));
+            send_pending_req(s, aio_req->oid);
         }
         break;
     case AIOCB_READ_UDATA:
commit bd751f2204a03d6fcd47a4b4b12ac534d2ecbea7
Author: Liu Yuan <tailai.ly at taobao.com>
Date:   Mon Dec 17 14:17:26 2012 +0800

    sheepdog: don't update inode when create_and_write fails
    
    For the error case such as SD_RES_NO_SPACE, we shouldn't update the inode bitmap
    to avoid the scenario that the object is allocated but wasn't created at the
    server side. This will result in VM's IO error on the failed object.
    
    Cc: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
    Cc: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Liu Yuan <tailai.ly at taobao.com>
    Reviewed-by: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 13dc023..b9186fb 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -714,10 +714,11 @@ static void coroutine_fn aio_read_response(void *opaque)
              * and max_dirty_data_idx are changed to include updated
              * index between them.
              */
-            s->inode.data_vdi_id[idx] = s->inode.vdi_id;
-            s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
-            s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
-
+            if (rsp.result == SD_RES_SUCCESS) {
+                s->inode.data_vdi_id[idx] = s->inode.vdi_id;
+                s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
+                s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+            }
             /*
              * Some requests may be blocked because simultaneous
              * create requests are not allowed, so we search the
commit fccedc624c425e3acb1557f9f9b13104427ec5ce
Author: Stefan Weil <sw at weilnetz.de>
Date:   Mon Dec 17 20:40:01 2012 +0100

    block/raw-win32: Fix compiler warnings (wrong format specifiers)
    
    Commit fbcad04d6bfdff937536eb23088a01a280a1a3af added fprintf statements
    with wrong format specifiers.
    
    GetLastError() returns a DWORD which is unsigned long, so %lu must be used.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-win32.c b/block/raw-win32.c
index f58334b..b89ac19 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -314,11 +314,11 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
      */
     dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
     if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-        fprintf(stderr, "SetFilePointer error: %d\n", GetLastError());
+        fprintf(stderr, "SetFilePointer error: %lu\n", GetLastError());
         return -EIO;
     }
     if (SetEndOfFile(s->hfile) == 0) {
-        fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError());
+        fprintf(stderr, "SetEndOfFile error: %lu\n", GetLastError());
         return -EIO;
     }
     return 0;
commit 7944339726b4582b67fd94085c21c33636e8f973
Author: liguang <lig.fnst at cn.fujitsu.com>
Date:   Mon Dec 17 09:49:23 2012 +0800

    qemu-img: report size overflow error message
    
    qemu-img will complain when qcow or qcow2
    size overflow for 64 bits, report the right
    message in this condition.
    
    $./qemu-img create -f qcow2 /tmp/foo 0x10000000000000000
    before change:
    qemu-img: Invalid image size specified! You may use k, M, G or T suffixes for
    qemu-img: kilobytes, megabytes, gigabytes and terabytes.
    
    after change:
    qemu-img: Image size must be less than 8 EiB!
    
    [Resolved conflict with a9300911 goto removal -- Stefan]
    
    Signed-off-by: liguang <lig.fnst at cn.fujitsu.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index 69cc028..85d3740 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -348,9 +348,13 @@ static int img_create(int argc, char **argv)
         char *end;
         sval = strtosz_suffix(argv[optind++], &end, STRTOSZ_DEFSUFFIX_B);
         if (sval < 0 || *end) {
-            error_report("Invalid image size specified! You may use k, M, G or "
-                  "T suffixes for ");
-            error_report("kilobytes, megabytes, gigabytes and terabytes.");
+            if (sval == -ERANGE) {
+                error_report("Image size must be less than 8 EiB!");
+            } else {
+                error_report("Invalid image size specified! You may use k, M, "
+                      "G or T suffixes for ");
+                error_report("kilobytes, megabytes, gigabytes and terabytes.");
+            }
             return 1;
         }
         img_size = (uint64_t)sval;
commit 37edbf7ea8067262a5c3d8bbe4786139348c8311
Author: liguang <lig.fnst at cn.fujitsu.com>
Date:   Mon Dec 17 09:49:22 2012 +0800

    cutils: change strtosz_suffix_unit function
    
    if value to be translated is larger than INT64_MAX,
    this function will not be convenient for caller to
    be aware of it, so change a little for this.
    
    Signed-off-by: liguang <lig.fnst at cn.fujitsu.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/cutils.c b/cutils.c
index d06590b..80bb1dc 100644
--- a/cutils.c
+++ b/cutils.c
@@ -214,12 +214,13 @@ static int64_t suffix_mul(char suffix, int64_t unit)
 /*
  * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
  * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
- * in *end, if not NULL. Return -1 on error.
+ * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
+ * other error.
  */
 int64_t strtosz_suffix_unit(const char *nptr, char **end,
                             const char default_suffix, int64_t unit)
 {
-    int64_t retval = -1;
+    int64_t retval = -EINVAL;
     char *endptr;
     unsigned char c;
     int mul_required = 0;
@@ -246,6 +247,7 @@ int64_t strtosz_suffix_unit(const char *nptr, char **end,
         goto fail;
     }
     if ((val * mul >= INT64_MAX) || val < 0) {
+        retval = -ERANGE;
         goto fail;
     }
     retval = val * mul;
commit 9e72c45033770b81b536ac6091e91807247cc25a
Author: Alexey Zaytsev <alexey.zaytsev at gmail.com>
Date:   Thu Dec 13 09:03:43 2012 +0200

    virtio-blk: Return UNSUPP for unknown request types
    
    Currently, all unknown requests are treated as VIRTIO_BLK_T_IN
    
    Signed-off-by: Alexey Zaytsev <alexey.zaytsev at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 92c745a..df57b35 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -398,10 +398,14 @@ static void virtio_blk_handle_request(VirtIOBlockReq *req,
         qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
                                  req->elem.out_num - 1);
         virtio_blk_handle_write(req, mrb);
-    } else {
+    } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
+        /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
         qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
                                  req->elem.in_num - 1);
         virtio_blk_handle_read(req);
+    } else {
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+        g_free(req);
     }
 }
 
commit 392808b49b6aee066d0c1d200e72fc3dc11c9d0f
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 15:45:38 2012 +0100

    virtio-blk: add x-data-plane=on|off performance feature
    
    The virtio-blk-data-plane feature is easy to integrate into
    hw/virtio-blk.c.  The data plane can be started and stopped similar to
    vhost-net.
    
    Users can take advantage of the virtio-blk-data-plane feature using the
    new -device virtio-blk-pci,x-data-plane=on property.
    
    The x-data-plane name was chosen because at this stage the feature is
    experimental and likely to see changes in the future.
    
    If the VM configuration does not support virtio-blk-data-plane an error
    message is printed.  Although we could fall back to regular virtio-blk,
    I prefer the explicit approach since it prompts the user to fix their
    configuration if they want the performance benefit of
    virtio-blk-data-plane.
    
    Limitations:
     * Only format=raw is supported
     * Live migration is not supported
     * Block jobs, hot unplug, and other operations fail with -EBUSY
     * I/O throttling limits are ignored
     * Only Linux hosts are supported due to Linux AIO usage
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index f004148..92c745a 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -17,6 +17,9 @@
 #include "hw/block-common.h"
 #include "sysemu/blockdev.h"
 #include "virtio-blk.h"
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+#include "hw/dataplane/virtio-blk.h"
+#endif
 #include "scsi-defs.h"
 #ifdef __linux__
 # include <scsi/sg.h>
@@ -33,6 +36,9 @@ typedef struct VirtIOBlock
     VirtIOBlkConf *blk;
     unsigned short sector_mask;
     DeviceState *qdev;
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    VirtIOBlockDataPlane *dataplane;
+#endif
 } VirtIOBlock;
 
 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -407,6 +413,16 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         .num_writes = 0,
     };
 
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
+     * dataplane here instead of waiting for .set_status().
+     */
+    if (s->dataplane) {
+        virtio_blk_data_plane_start(s->dataplane);
+        return;
+    }
+#endif
+
     while ((req = virtio_blk_get_request(s))) {
         virtio_blk_handle_request(req, &mrb);
     }
@@ -446,8 +462,9 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
 {
     VirtIOBlock *s = opaque;
 
-    if (!running)
+    if (!running) {
         return;
+    }
 
     if (!s->bh) {
         s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
@@ -457,6 +474,14 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
 
 static void virtio_blk_reset(VirtIODevice *vdev)
 {
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    VirtIOBlock *s = to_virtio_blk(vdev);
+
+    if (s->dataplane) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }
+#endif
+
     /*
      * This should cancel pending requests, but can't do nicely until there
      * are per-device request lists.
@@ -541,6 +566,12 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     VirtIOBlock *s = to_virtio_blk(vdev);
     uint32_t features;
 
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    if (s->dataplane && !(status & VIRTIO_CONFIG_S_DRIVER)) {
+        virtio_blk_data_plane_stop(s->dataplane);
+    }
+#endif
+
     if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
         return;
     }
@@ -638,6 +669,12 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
     s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
     s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    if (!virtio_blk_data_plane_create(&s->vdev, blk, &s->dataplane)) {
+        virtio_cleanup(&s->vdev);
+        return NULL;
+    }
+#endif
 
     qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
     s->qdev = dev;
@@ -655,6 +692,11 @@ VirtIODevice *virtio_blk_init(DeviceState *dev, VirtIOBlkConf *blk)
 void virtio_blk_exit(VirtIODevice *vdev)
 {
     VirtIOBlock *s = to_virtio_blk(vdev);
+
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    virtio_blk_data_plane_destroy(s->dataplane);
+    s->dataplane = NULL;
+#endif
     unregister_savevm(s->qdev, "virtio-blk", s);
     blockdev_mark_auto_del(s->bs);
     virtio_cleanup(vdev);
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 3cab783..82761cf 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -896,6 +896,9 @@ static Property virtio_blk_properties[] = {
 #endif
     DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
     DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+    DEFINE_PROP_BIT("x-data-plane", VirtIOPCIProxy, blk.data_plane, 0, false),
+#endif
     DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
     DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
     DEFINE_PROP_END_OF_LIST(),
commit e72f66a0a20f38d0c7576f6c0aec0ca644976e35
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 15:39:30 2012 +0100

    dataplane: add virtio-blk data plane code
    
    virtio-blk-data-plane is a subset implementation of virtio-blk.  It only
    handles read, write, and flush requests.  It does this using a dedicated
    thread that executes an epoll(2)-based event loop and processes I/O
    using Linux AIO.
    
    This approach performs very well but can be used for raw image files
    only.  The number of IOPS achieved has been reported to be several times
    higher than the existing virtio-blk implementation.
    
    Eventually it should be possible to unify virtio-blk-data-plane with the
    main body of QEMU code once the block layer and hardware emulation is
    able to run outside the global mutex.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
index abd408f..682aa9e 100644
--- a/hw/dataplane/Makefile.objs
+++ b/hw/dataplane/Makefile.objs
@@ -1,3 +1,3 @@
 ifeq ($(CONFIG_VIRTIO), y)
-common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o virtio-blk.o
 endif
diff --git a/hw/dataplane/virtio-blk.c b/hw/dataplane/virtio-blk.c
new file mode 100644
index 0000000..4c4ad84
--- /dev/null
+++ b/hw/dataplane/virtio-blk.c
@@ -0,0 +1,465 @@
+/*
+ * Dedicated thread for virtio-blk I/O processing
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu/iov.h"
+#include "event-poll.h"
+#include "qemu/thread.h"
+#include "vring.h"
+#include "ioq.h"
+#include "migration/migration.h"
+#include "hw/virtio-blk.h"
+#include "hw/dataplane/virtio-blk.h"
+
+enum {
+    SEG_MAX = 126,                  /* maximum number of I/O segments */
+    VRING_MAX = SEG_MAX + 2,        /* maximum number of vring descriptors */
+    REQ_MAX = VRING_MAX,            /* maximum number of requests in the vring,
+                                     * is VRING_MAX / 2 with traditional and
+                                     * VRING_MAX with indirect descriptors */
+};
+
+typedef struct {
+    struct iocb iocb;               /* Linux AIO control block */
+    QEMUIOVector *inhdr;            /* iovecs for virtio_blk_inhdr */
+    unsigned int head;              /* vring descriptor index */
+} VirtIOBlockRequest;
+
+struct VirtIOBlockDataPlane {
+    bool started;
+    QEMUBH *start_bh;
+    QemuThread thread;
+
+    VirtIOBlkConf *blk;
+    int fd;                         /* image file descriptor */
+
+    VirtIODevice *vdev;
+    Vring vring;                    /* virtqueue vring */
+    EventNotifier *guest_notifier;  /* irq */
+
+    EventPoll event_poll;           /* event poller */
+    EventHandler io_handler;        /* Linux AIO completion handler */
+    EventHandler notify_handler;    /* virtqueue notify handler */
+
+    IOQueue ioqueue;                /* Linux AIO queue (should really be per
+                                       dataplane thread) */
+    VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the
+                                             queue */
+
+    unsigned int num_reqs;
+
+    Error *migration_blocker;
+};
+
+/* Raise an interrupt to signal guest, if necessary */
+static void notify_guest(VirtIOBlockDataPlane *s)
+{
+    if (!vring_should_notify(s->vdev, &s->vring)) {
+        return;
+    }
+
+    event_notifier_set(s->guest_notifier);
+}
+
+static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+{
+    VirtIOBlockDataPlane *s = opaque;
+    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+    struct virtio_blk_inhdr hdr;
+    int len;
+
+    if (likely(ret >= 0)) {
+        hdr.status = VIRTIO_BLK_S_OK;
+        len = ret;
+    } else {
+        hdr.status = VIRTIO_BLK_S_IOERR;
+        len = 0;
+    }
+
+    trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
+
+    qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
+    qemu_iovec_destroy(req->inhdr);
+    g_slice_free(QEMUIOVector, req->inhdr);
+
+    /* According to the virtio specification len should be the number of bytes
+     * written to, but for virtio-blk it seems to be the number of bytes
+     * transferred plus the status bytes.
+     */
+    vring_push(&s->vring, req->head, len + sizeof(hdr));
+
+    s->num_reqs--;
+}
+
+static void complete_request_early(VirtIOBlockDataPlane *s, unsigned int head,
+                                   QEMUIOVector *inhdr, unsigned char status)
+{
+    struct virtio_blk_inhdr hdr = {
+        .status = status,
+    };
+
+    qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr));
+    qemu_iovec_destroy(inhdr);
+    g_slice_free(QEMUIOVector, inhdr);
+
+    vring_push(&s->vring, head, sizeof(hdr));
+    notify_guest(s);
+}
+
+/* Get disk serial number */
+static void do_get_id_cmd(VirtIOBlockDataPlane *s,
+                          struct iovec *iov, unsigned int iov_cnt,
+                          unsigned int head, QEMUIOVector *inhdr)
+{
+    char id[VIRTIO_BLK_ID_BYTES];
+
+    /* Serial number not NUL-terminated when shorter than buffer */
+    strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
+    iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
+    complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
+}
+
+static int process_request(IOQueue *ioq, struct iovec iov[],
+                           unsigned int out_num, unsigned int in_num,
+                           unsigned int head)
+{
+    VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
+    struct iovec *in_iov = &iov[out_num];
+    struct virtio_blk_outhdr outhdr;
+    QEMUIOVector *inhdr;
+    size_t in_size;
+    struct iocb *iocb;
+
+    /* Copy in outhdr */
+    if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
+                            sizeof(outhdr)) != sizeof(outhdr))) {
+        error_report("virtio-blk request outhdr too short");
+        return -EFAULT;
+    }
+    iov_discard_front(&iov, &out_num, sizeof(outhdr));
+
+    /* Grab inhdr for later */
+    in_size = iov_size(in_iov, in_num);
+    if (in_size < sizeof(struct virtio_blk_inhdr)) {
+        error_report("virtio_blk request inhdr too short");
+        return -EFAULT;
+    }
+    inhdr = g_slice_new(QEMUIOVector);
+    qemu_iovec_init(inhdr, 1);
+    qemu_iovec_concat_iov(inhdr, in_iov, in_num,
+            in_size - sizeof(struct virtio_blk_inhdr),
+            sizeof(struct virtio_blk_inhdr));
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
+
+    /* TODO Linux sets the barrier bit even when not advertised! */
+    outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
+
+    switch (outhdr.type) {
+    case VIRTIO_BLK_T_IN:
+        iocb = ioq_rdwr(ioq, true, in_iov, in_num, outhdr.sector * 512);
+        break;
+
+    case VIRTIO_BLK_T_OUT:
+        iocb = ioq_rdwr(ioq, false, iov, out_num, outhdr.sector * 512);
+        break;
+
+    case VIRTIO_BLK_T_SCSI_CMD:
+        /* TODO support SCSI commands */
+        complete_request_early(s, head, inhdr, VIRTIO_BLK_S_UNSUPP);
+        return 0;
+
+    case VIRTIO_BLK_T_FLUSH:
+        /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
+        if (qemu_fdatasync(s->fd) < 0) {
+            complete_request_early(s, head, inhdr, VIRTIO_BLK_S_IOERR);
+        } else {
+            complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
+        }
+        return 0;
+
+    case VIRTIO_BLK_T_GET_ID:
+        do_get_id_cmd(s, in_iov, in_num, head, inhdr);
+        return 0;
+
+    default:
+        error_report("virtio-blk unsupported request type %#x", outhdr.type);
+        qemu_iovec_destroy(inhdr);
+        g_slice_free(QEMUIOVector, inhdr);
+        return -EFAULT;
+    }
+
+    /* Fill in virtio block metadata needed for completion */
+    VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+    req->head = head;
+    req->inhdr = inhdr;
+    return 0;
+}
+
+static void handle_notify(EventHandler *handler)
+{
+    VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane,
+                                           notify_handler);
+
+    /* There is one array of iovecs into which all new requests are extracted
+     * from the vring.  Requests are read from the vring and the translated
+     * descriptors are written to the iovecs array.  The iovecs do not have to
+     * persist across handle_notify() calls because the kernel copies the
+     * iovecs on io_submit().
+     *
+     * Handling io_submit() EAGAIN may require storing the requests across
+     * handle_notify() calls until the kernel has sufficient resources to
+     * accept more I/O.  This is not implemented yet.
+     */
+    struct iovec iovec[VRING_MAX];
+    struct iovec *end = &iovec[VRING_MAX];
+    struct iovec *iov = iovec;
+
+    /* When a request is read from the vring, the index of the first descriptor
+     * (aka head) is returned so that the completed request can be pushed onto
+     * the vring later.
+     *
+     * The number of hypervisor read-only iovecs is out_num.  The number of
+     * hypervisor write-only iovecs is in_num.
+     */
+    int head;
+    unsigned int out_num = 0, in_num = 0;
+    unsigned int num_queued;
+
+    for (;;) {
+        /* Disable guest->host notifies to avoid unnecessary vmexits */
+        vring_disable_notification(s->vdev, &s->vring);
+
+        for (;;) {
+            head = vring_pop(s->vdev, &s->vring, iov, end, &out_num, &in_num);
+            if (head < 0) {
+                break; /* no more requests */
+            }
+
+            trace_virtio_blk_data_plane_process_request(s, out_num, in_num,
+                                                        head);
+
+            if (process_request(&s->ioqueue, iov, out_num, in_num, head) < 0) {
+                vring_set_broken(&s->vring);
+                break;
+            }
+            iov += out_num + in_num;
+        }
+
+        if (likely(head == -EAGAIN)) { /* vring emptied */
+            /* Re-enable guest->host notifies and stop processing the vring.
+             * But if the guest has snuck in more descriptors, keep processing.
+             */
+            if (vring_enable_notification(s->vdev, &s->vring)) {
+                break;
+            }
+        } else { /* head == -ENOBUFS or fatal error, iovecs[] is depleted */
+            /* Since there are no iovecs[] left, stop processing for now.  Do
+             * not re-enable guest->host notifies since the I/O completion
+             * handler knows to check for more vring descriptors anyway.
+             */
+            break;
+        }
+    }
+
+    num_queued = ioq_num_queued(&s->ioqueue);
+    if (num_queued > 0) {
+        s->num_reqs += num_queued;
+
+        int rc = ioq_submit(&s->ioqueue);
+        if (unlikely(rc < 0)) {
+            fprintf(stderr, "ioq_submit failed %d\n", rc);
+            exit(1);
+        }
+    }
+}
+
+static void handle_io(EventHandler *handler)
+{
+    VirtIOBlockDataPlane *s = container_of(handler, VirtIOBlockDataPlane,
+                                           io_handler);
+
+    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
+        notify_guest(s);
+    }
+
+    /* If there were more requests than iovecs, the vring will not be empty yet
+     * so check again.  There should now be enough resources to process more
+     * requests.
+     */
+    if (unlikely(vring_more_avail(&s->vring))) {
+        handle_notify(&s->notify_handler);
+    }
+}
+
+static void *data_plane_thread(void *opaque)
+{
+    VirtIOBlockDataPlane *s = opaque;
+
+    do {
+        event_poll(&s->event_poll);
+    } while (s->started || s->num_reqs > 0);
+    return NULL;
+}
+
+static void start_data_plane_bh(void *opaque)
+{
+    VirtIOBlockDataPlane *s = opaque;
+
+    qemu_bh_delete(s->start_bh);
+    s->start_bh = NULL;
+    qemu_thread_create(&s->thread, data_plane_thread,
+                       s, QEMU_THREAD_JOINABLE);
+}
+
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
+                                  VirtIOBlockDataPlane **dataplane)
+{
+    VirtIOBlockDataPlane *s;
+    int fd;
+
+    *dataplane = NULL;
+
+    if (!blk->data_plane) {
+        return true;
+    }
+
+    if (blk->scsi) {
+        error_report("device is incompatible with x-data-plane, use scsi=off");
+        return false;
+    }
+
+    if (blk->config_wce) {
+        error_report("device is incompatible with x-data-plane, "
+                     "use config-wce=off");
+        return false;
+    }
+
+    fd = raw_get_aio_fd(blk->conf.bs);
+    if (fd < 0) {
+        error_report("drive is incompatible with x-data-plane, "
+                     "use format=raw,cache=none,aio=native");
+        return false;
+    }
+
+    s = g_new0(VirtIOBlockDataPlane, 1);
+    s->vdev = vdev;
+    s->fd = fd;
+    s->blk = blk;
+
+    /* Prevent block operations that conflict with data plane thread */
+    bdrv_set_in_use(blk->conf.bs, 1);
+
+    error_setg(&s->migration_blocker,
+            "x-data-plane does not support migration");
+    migrate_add_blocker(s->migration_blocker);
+
+    *dataplane = s;
+    return true;
+}
+
+void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
+{
+    if (!s) {
+        return;
+    }
+
+    virtio_blk_data_plane_stop(s);
+    migrate_del_blocker(s->migration_blocker);
+    error_free(s->migration_blocker);
+    bdrv_set_in_use(s->blk->conf.bs, 0);
+    g_free(s);
+}
+
+void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
+{
+    VirtQueue *vq;
+    int i;
+
+    if (s->started) {
+        return;
+    }
+
+    vq = virtio_get_queue(s->vdev, 0);
+    if (!vring_setup(&s->vring, s->vdev, 0)) {
+        return;
+    }
+
+    event_poll_init(&s->event_poll);
+
+    /* Set up guest notifier (irq) */
+    if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque,
+                                              true) != 0) {
+        fprintf(stderr, "virtio-blk failed to set guest notifier, "
+                "ensure -enable-kvm is set\n");
+        exit(1);
+    }
+    s->guest_notifier = virtio_queue_get_guest_notifier(vq);
+
+    /* Set up virtqueue notify */
+    if (s->vdev->binding->set_host_notifier(s->vdev->binding_opaque,
+                                            0, true) != 0) {
+        fprintf(stderr, "virtio-blk failed to set host notifier\n");
+        exit(1);
+    }
+    event_poll_add(&s->event_poll, &s->notify_handler,
+                   virtio_queue_get_host_notifier(vq),
+                   handle_notify);
+
+    /* Set up ioqueue */
+    ioq_init(&s->ioqueue, s->fd, REQ_MAX);
+    for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
+        ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
+    }
+    event_poll_add(&s->event_poll, &s->io_handler,
+                   ioq_get_notifier(&s->ioqueue), handle_io);
+
+    s->started = true;
+    trace_virtio_blk_data_plane_start(s);
+
+    /* Kick right away to begin processing requests already in vring */
+    event_notifier_set(virtio_queue_get_host_notifier(vq));
+
+    /* Spawn thread in BH so it inherits iothread cpusets */
+    s->start_bh = qemu_bh_new(start_data_plane_bh, s);
+    qemu_bh_schedule(s->start_bh);
+}
+
+void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
+{
+    if (!s->started) {
+        return;
+    }
+    s->started = false;
+    trace_virtio_blk_data_plane_stop(s);
+
+    /* Stop thread or cancel pending thread creation BH */
+    if (s->start_bh) {
+        qemu_bh_delete(s->start_bh);
+        s->start_bh = NULL;
+    } else {
+        event_poll_notify(&s->event_poll);
+        qemu_thread_join(&s->thread);
+    }
+
+    ioq_cleanup(&s->ioqueue);
+
+    s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false);
+
+    event_poll_cleanup(&s->event_poll);
+
+    /* Clean up guest notifier (irq) */
+    s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, false);
+
+    vring_teardown(&s->vring);
+}
diff --git a/hw/dataplane/virtio-blk.h b/hw/dataplane/virtio-blk.h
new file mode 100644
index 0000000..1e8fdfe
--- /dev/null
+++ b/hw/dataplane/virtio-blk.h
@@ -0,0 +1,29 @@
+/*
+ * Dedicated thread for virtio-blk I/O processing
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_DATAPLANE_VIRTIO_BLK_H
+#define HW_DATAPLANE_VIRTIO_BLK_H
+
+#include "hw/virtio.h"
+
+typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane;
+
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
+                                  VirtIOBlockDataPlane **dataplane);
+void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s);
+void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s);
+void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s);
+void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s);
+
+#endif /* HW_DATAPLANE_VIRTIO_BLK_H */
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index 454f445..43ca492 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -105,6 +105,7 @@ struct VirtIOBlkConf
     char *serial;
     uint32_t scsi;
     uint32_t config_wce;
+    uint32_t data_plane;
 };
 
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
diff --git a/trace-events b/trace-events
index 167d776..4023a4c 100644
--- a/trace-events
+++ b/trace-events
@@ -98,6 +98,12 @@ virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
 virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 
+# hw/dataplane/virtio-blk.c
+virtio_blk_data_plane_start(void *s) "dataplane %p"
+virtio_blk_data_plane_stop(void *s) "dataplane %p"
+virtio_blk_data_plane_process_request(void *s, unsigned int out_num, unsigned int in_num, unsigned int head) "dataplane %p out_num %u in_num %u head %u"
+virtio_blk_data_plane_complete_request(void *s, unsigned int head, int ret) "dataplane %p head %u ret %d"
+
 # hw/dataplane/vring.c
 vring_setup(uint64_t physical, void *desc, void *avail, void *used) "vring physical %#"PRIx64" desc %p avail %p used %p"
 
commit 8a873ba78069ef81c4ef073a0bd703172c8b3312
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Mon Dec 10 13:14:39 2012 +0100

    virtio-blk: restore VirtIOBlkConf->config_wce flag
    
    Two slightly different versions of a patch to conditionally set
    VIRTIO_BLK_F_CONFIG_WCE through the "config-wce" qdev property have been
    applied (ea776abca and eec7f96c2).  David Gibson
    <david at gibson.dropbear.id.au> noticed that the "config-wce"
    property is broken as a result and fixed it recently.
    
    The fix sets the host_features VIRTIO_BLK_F_CONFIG_WCE bit from a qdev
    property.  Unfortunately, the virtio device then has no chance to test
    for the presence of the feature bit during virtio_blk_init().
    
    Therefore, reinstate the VirtIOBlkConf->config_wce flag.  Drop the
    duplicate qdev property to set the host_features bit.  The
    VirtIOBlkConf->config_wce flag will be used by virtio-blk-data-plane in
    a later patch.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 90cfa24..f004148 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -524,6 +524,9 @@ static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
     features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
     features |= (1 << VIRTIO_BLK_F_SCSI);
 
+    if (s->blk->config_wce) {
+        features |= (1 << VIRTIO_BLK_F_CONFIG_WCE);
+    }
     if (bdrv_enable_write_cache(s->bs))
         features |= (1 << VIRTIO_BLK_F_WCE);
 
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index 651a000..454f445 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -104,10 +104,10 @@ struct VirtIOBlkConf
     BlockConf conf;
     char *serial;
     uint32_t scsi;
+    uint32_t config_wce;
 };
 
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
-        DEFINE_VIRTIO_COMMON_FEATURES(_state, _field), \
-        DEFINE_PROP_BIT("config-wce", _state, _field, VIRTIO_BLK_F_CONFIG_WCE, true)
+        DEFINE_VIRTIO_COMMON_FEATURES(_state, _field)
 
 #endif
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d2d2454..3cab783 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -894,6 +894,7 @@ static Property virtio_blk_properties[] = {
 #ifdef __linux__
     DEFINE_PROP_BIT("scsi", VirtIOPCIProxy, blk.scsi, 0, true),
 #endif
+    DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
     DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
     DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
commit 530c0bbd73e1b658c9266582072847de1fbdff10
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Thu Nov 22 16:06:06 2012 +0100

    iov: add qemu_iovec_concat_iov()
    
    The qemu_iovec_concat() function copies a subset of a QEMUIOVector.  The
    new qemu_iovec_concat_iov() function does the same for a iov/cnt pair.
    
    It is easy to define qemu_iovec_concat() in terms of
    qemu_iovec_concat_iov().  The existing code is mostly unchanged, except
    for the assertion src->size >= soffset, which cannot be efficiently
    checked upfront on a iov/cnt pair.  Instead we assert upon hitting the
    end of src with an unsatisfied soffset.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/include/qemu-common.h b/include/qemu-common.h
index 6871cab..2b83de3 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -329,6 +329,9 @@ void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
 void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
 void qemu_iovec_concat(QEMUIOVector *dst,
                        QEMUIOVector *src, size_t soffset, size_t sbytes);
+void qemu_iovec_concat_iov(QEMUIOVector *dst,
+                           struct iovec *src_iov, unsigned int src_cnt,
+                           size_t soffset, size_t sbytes);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/iov.c b/iov.c
index 92ad77b..c0f5c56 100644
--- a/iov.c
+++ b/iov.c
@@ -289,34 +289,49 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
 }
 
 /*
- * Concatenates (partial) iovecs from src to the end of dst.
+ * Concatenates (partial) iovecs from src_iov to the end of dst.
  * It starts copying after skipping `soffset' bytes at the
  * beginning of src and adds individual vectors from src to
  * dst copies up to `sbytes' bytes total, or up to the end
- * of src if it comes first.  This way, it is okay to specify
+ * of src_iov if it comes first.  This way, it is okay to specify
  * very large value for `sbytes' to indicate "up to the end
  * of src".
  * Only vector pointers are processed, not the actual data buffers.
  */
-void qemu_iovec_concat(QEMUIOVector *dst,
-                       QEMUIOVector *src, size_t soffset, size_t sbytes)
+void qemu_iovec_concat_iov(QEMUIOVector *dst,
+                           struct iovec *src_iov, unsigned int src_cnt,
+                           size_t soffset, size_t sbytes)
 {
     int i;
     size_t done;
-    struct iovec *siov = src->iov;
     assert(dst->nalloc != -1);
-    assert(src->size >= soffset);
-    for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
-        if (soffset < siov[i].iov_len) {
-            size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
-            qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
+    for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
+        if (soffset < src_iov[i].iov_len) {
+            size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
+            qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
             done += len;
             soffset = 0;
         } else {
-            soffset -= siov[i].iov_len;
+            soffset -= src_iov[i].iov_len;
         }
     }
-    /* return done; */
+    assert(soffset == 0); /* offset beyond end of src */
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+                       QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+    qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
 void qemu_iovec_destroy(QEMUIOVector *qiov)
commit 8962e44fe438a051aff9f43209363f599be33624
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 21 19:18:26 2012 +0100

    test-iov: add iov_discard_front/back() testcases
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/test-iov.c b/tests/test-iov.c
index a480bc8..46e4ddd 100644
--- a/tests/test-iov.c
+++ b/tests/test-iov.c
@@ -250,11 +250,161 @@ static void test_io(void)
 #endif
 }
 
+static void test_discard_front(void)
+{
+    struct iovec *iov;
+    struct iovec *iov_tmp;
+    unsigned int iov_cnt;
+    unsigned int iov_cnt_tmp;
+    void *old_base;
+    size_t size;
+    size_t ret;
+
+    /* Discard zero bytes */
+    iov_random(&iov, &iov_cnt);
+    iov_tmp = iov;
+    iov_cnt_tmp = iov_cnt;
+    ret = iov_discard_front(&iov_tmp, &iov_cnt_tmp, 0);
+    g_assert(ret == 0);
+    g_assert(iov_tmp == iov);
+    g_assert(iov_cnt_tmp == iov_cnt);
+    iov_free(iov, iov_cnt);
+
+    /* Discard more bytes than vector size */
+    iov_random(&iov, &iov_cnt);
+    iov_tmp = iov;
+    iov_cnt_tmp = iov_cnt;
+    size = iov_size(iov, iov_cnt);
+    ret = iov_discard_front(&iov_tmp, &iov_cnt_tmp, size + 1);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == 0);
+    iov_free(iov, iov_cnt);
+
+    /* Discard entire vector */
+    iov_random(&iov, &iov_cnt);
+    iov_tmp = iov;
+    iov_cnt_tmp = iov_cnt;
+    size = iov_size(iov, iov_cnt);
+    ret = iov_discard_front(&iov_tmp, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == 0);
+    iov_free(iov, iov_cnt);
+
+    /* Discard within first element */
+    iov_random(&iov, &iov_cnt);
+    iov_tmp = iov;
+    iov_cnt_tmp = iov_cnt;
+    old_base = iov->iov_base;
+    size = g_test_rand_int_range(1, iov->iov_len);
+    ret = iov_discard_front(&iov_tmp, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_tmp == iov);
+    g_assert(iov_cnt_tmp == iov_cnt);
+    g_assert(iov_tmp->iov_base == old_base + size);
+    iov_tmp->iov_base = old_base; /* undo before g_free() */
+    iov_free(iov, iov_cnt);
+
+    /* Discard entire first element */
+    iov_random(&iov, &iov_cnt);
+    iov_tmp = iov;
+    iov_cnt_tmp = iov_cnt;
+    ret = iov_discard_front(&iov_tmp, &iov_cnt_tmp, iov->iov_len);
+    g_assert(ret == iov->iov_len);
+    g_assert(iov_tmp == iov + 1);
+    g_assert(iov_cnt_tmp == iov_cnt - 1);
+    iov_free(iov, iov_cnt);
+
+    /* Discard within second element */
+    iov_random(&iov, &iov_cnt);
+    iov_tmp = iov;
+    iov_cnt_tmp = iov_cnt;
+    old_base = iov[1].iov_base;
+    size = iov->iov_len + g_test_rand_int_range(1, iov[1].iov_len);
+    ret = iov_discard_front(&iov_tmp, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_tmp == iov + 1);
+    g_assert(iov_cnt_tmp == iov_cnt - 1);
+    g_assert(iov_tmp->iov_base == old_base + (size - iov->iov_len));
+    iov_tmp->iov_base = old_base; /* undo before g_free() */
+    iov_free(iov, iov_cnt);
+}
+
+static void test_discard_back(void)
+{
+    struct iovec *iov;
+    unsigned int iov_cnt;
+    unsigned int iov_cnt_tmp;
+    void *old_base;
+    size_t size;
+    size_t ret;
+
+    /* Discard zero bytes */
+    iov_random(&iov, &iov_cnt);
+    iov_cnt_tmp = iov_cnt;
+    ret = iov_discard_back(iov, &iov_cnt_tmp, 0);
+    g_assert(ret == 0);
+    g_assert(iov_cnt_tmp == iov_cnt);
+    iov_free(iov, iov_cnt);
+
+    /* Discard more bytes than vector size */
+    iov_random(&iov, &iov_cnt);
+    iov_cnt_tmp = iov_cnt;
+    size = iov_size(iov, iov_cnt);
+    ret = iov_discard_back(iov, &iov_cnt_tmp, size + 1);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == 0);
+    iov_free(iov, iov_cnt);
+
+    /* Discard entire vector */
+    iov_random(&iov, &iov_cnt);
+    iov_cnt_tmp = iov_cnt;
+    size = iov_size(iov, iov_cnt);
+    ret = iov_discard_back(iov, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == 0);
+    iov_free(iov, iov_cnt);
+
+    /* Discard within last element */
+    iov_random(&iov, &iov_cnt);
+    iov_cnt_tmp = iov_cnt;
+    old_base = iov[iov_cnt - 1].iov_base;
+    size = g_test_rand_int_range(1, iov[iov_cnt - 1].iov_len);
+    ret = iov_discard_back(iov, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == iov_cnt);
+    g_assert(iov[iov_cnt - 1].iov_base == old_base);
+    iov_free(iov, iov_cnt);
+
+    /* Discard entire last element */
+    iov_random(&iov, &iov_cnt);
+    iov_cnt_tmp = iov_cnt;
+    old_base = iov[iov_cnt - 1].iov_base;
+    size = iov[iov_cnt - 1].iov_len;
+    ret = iov_discard_back(iov, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == iov_cnt - 1);
+    iov_free(iov, iov_cnt);
+
+    /* Discard within second-to-last element */
+    iov_random(&iov, &iov_cnt);
+    iov_cnt_tmp = iov_cnt;
+    old_base = iov[iov_cnt - 2].iov_base;
+    size = iov[iov_cnt - 1].iov_len +
+           g_test_rand_int_range(1, iov[iov_cnt - 2].iov_len);
+    ret = iov_discard_back(iov, &iov_cnt_tmp, size);
+    g_assert(ret == size);
+    g_assert(iov_cnt_tmp == iov_cnt - 1);
+    g_assert(iov[iov_cnt - 2].iov_base == old_base);
+    iov_free(iov, iov_cnt);
+}
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
     g_test_rand_int();
     g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf);
     g_test_add_func("/basic/iov/io", test_io);
+    g_test_add_func("/basic/iov/discard-front", test_discard_front);
+    g_test_add_func("/basic/iov/discard-back", test_discard_back);
     return g_test_run();
 }
commit d02776350d9c76348988fc9e58a64a4f6b1a9f61
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 21 17:41:10 2012 +0100

    iov: add iov_discard_front/back() to remove data
    
    The iov_discard_front/back() functions remove data from the front or
    back of the vector.  This is useful when peeling off header/footer
    structs.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/include/qemu/iov.h b/include/qemu/iov.h
index d06f8b9..68d25f2 100644
--- a/include/qemu/iov.h
+++ b/include/qemu/iov.h
@@ -99,4 +99,17 @@ unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
                  const struct iovec *iov, unsigned int iov_cnt,
                  size_t offset, size_t bytes);
 
+/*
+ * Remove a given number of bytes from the front or back of a vector.
+ * This may update iov and/or iov_cnt to exclude iovec elements that are
+ * no longer required.
+ *
+ * The number of bytes actually discarded is returned.  This number may be
+ * smaller than requested if the vector is too small.
+ */
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+                         size_t bytes);
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+                        size_t bytes);
+
 #endif
diff --git a/iov.c b/iov.c
index 419e419..92ad77b 100644
--- a/iov.c
+++ b/iov.c
@@ -354,3 +354,54 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
 {
     return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
 }
+
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+                         size_t bytes)
+{
+    size_t total = 0;
+    struct iovec *cur;
+
+    for (cur = *iov; *iov_cnt > 0; cur++) {
+        if (cur->iov_len > bytes) {
+            cur->iov_base += bytes;
+            cur->iov_len -= bytes;
+            total += bytes;
+            break;
+        }
+
+        bytes -= cur->iov_len;
+        total += cur->iov_len;
+        *iov_cnt -= 1;
+    }
+
+    *iov = cur;
+    return total;
+}
+
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+                        size_t bytes)
+{
+    size_t total = 0;
+    struct iovec *cur;
+
+    if (*iov_cnt == 0) {
+        return 0;
+    }
+
+    cur = iov + (*iov_cnt - 1);
+
+    while (*iov_cnt > 0) {
+        if (cur->iov_len > bytes) {
+            cur->iov_len -= bytes;
+            total += bytes;
+            break;
+        }
+
+        bytes -= cur->iov_len;
+        total += cur->iov_len;
+        cur--;
+        *iov_cnt -= 1;
+    }
+
+    return total;
+}
commit 3e9ec521711ed033476098cfc7f23c992cc606a2
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 15:30:09 2012 +0100

    dataplane: add Linux AIO request queue
    
    The IOQueue has a pool of iocb structs and a function to add new
    read/write requests.  Multiple requests can be added before calling the
    submit function to actually tell the host kernel to begin I/O.  This
    allows callers to batch requests and submit them in one go.
    
    The actual I/O is performed using Linux AIO.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
index e26bd7d..abd408f 100644
--- a/hw/dataplane/Makefile.objs
+++ b/hw/dataplane/Makefile.objs
@@ -1,3 +1,3 @@
 ifeq ($(CONFIG_VIRTIO), y)
-common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o ioq.o
 endif
diff --git a/hw/dataplane/ioq.c b/hw/dataplane/ioq.c
new file mode 100644
index 0000000..0c9f5c4
--- /dev/null
+++ b/hw/dataplane/ioq.c
@@ -0,0 +1,117 @@
+/*
+ * Linux AIO request queue
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/dataplane/ioq.h"
+
+void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs)
+{
+    int rc;
+
+    ioq->fd = fd;
+    ioq->max_reqs = max_reqs;
+
+    memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx);
+    rc = io_setup(max_reqs, &ioq->io_ctx);
+    if (rc != 0) {
+        fprintf(stderr, "ioq io_setup failed %d\n", rc);
+        exit(1);
+    }
+
+    rc = event_notifier_init(&ioq->io_notifier, 0);
+    if (rc != 0) {
+        fprintf(stderr, "ioq io event notifier creation failed %d\n", rc);
+        exit(1);
+    }
+
+    ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs);
+    ioq->freelist_idx = 0;
+
+    ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs);
+    ioq->queue_idx = 0;
+}
+
+void ioq_cleanup(IOQueue *ioq)
+{
+    g_free(ioq->freelist);
+    g_free(ioq->queue);
+
+    event_notifier_cleanup(&ioq->io_notifier);
+    io_destroy(ioq->io_ctx);
+}
+
+EventNotifier *ioq_get_notifier(IOQueue *ioq)
+{
+    return &ioq->io_notifier;
+}
+
+struct iocb *ioq_get_iocb(IOQueue *ioq)
+{
+    /* Underflow cannot happen since ioq is sized for max_reqs */
+    assert(ioq->freelist_idx != 0);
+
+    struct iocb *iocb = ioq->freelist[--ioq->freelist_idx];
+    ioq->queue[ioq->queue_idx++] = iocb;
+    return iocb;
+}
+
+void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb)
+{
+    /* Overflow cannot happen since ioq is sized for max_reqs */
+    assert(ioq->freelist_idx != ioq->max_reqs);
+
+    ioq->freelist[ioq->freelist_idx++] = iocb;
+}
+
+struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
+                      unsigned int count, long long offset)
+{
+    struct iocb *iocb = ioq_get_iocb(ioq);
+
+    if (read) {
+        io_prep_preadv(iocb, ioq->fd, iov, count, offset);
+    } else {
+        io_prep_pwritev(iocb, ioq->fd, iov, count, offset);
+    }
+    io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
+    return iocb;
+}
+
+int ioq_submit(IOQueue *ioq)
+{
+    int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
+    ioq->queue_idx = 0; /* reset */
+    return rc;
+}
+
+int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
+                       void *opaque)
+{
+    struct io_event events[ioq->max_reqs];
+    int nevents, i;
+
+    do {
+        nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL);
+    } while (nevents < 0 && errno == EINTR);
+    if (nevents < 0) {
+        return nevents;
+    }
+
+    for (i = 0; i < nevents; i++) {
+        ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
+
+        completion(events[i].obj, ret, opaque);
+        ioq_put_iocb(ioq, events[i].obj);
+    }
+    return nevents;
+}
diff --git a/hw/dataplane/ioq.h b/hw/dataplane/ioq.h
new file mode 100644
index 0000000..b49b5de
--- /dev/null
+++ b/hw/dataplane/ioq.h
@@ -0,0 +1,57 @@
+/*
+ * Linux AIO request queue
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef IOQ_H
+#define IOQ_H
+
+#include <libaio.h>
+#include "qemu/event_notifier.h"
+
+typedef struct {
+    int fd;                         /* file descriptor */
+    unsigned int max_reqs;          /* max length of freelist and queue */
+
+    io_context_t io_ctx;            /* Linux AIO context */
+    EventNotifier io_notifier;      /* Linux AIO eventfd */
+
+    /* Requests can complete in any order so a free list is necessary to manage
+     * available iocbs.
+     */
+    struct iocb **freelist;         /* free iocbs */
+    unsigned int freelist_idx;
+
+    /* Multiple requests are queued up before submitting them all in one go */
+    struct iocb **queue;            /* queued iocbs */
+    unsigned int queue_idx;
+} IOQueue;
+
+void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs);
+void ioq_cleanup(IOQueue *ioq);
+EventNotifier *ioq_get_notifier(IOQueue *ioq);
+struct iocb *ioq_get_iocb(IOQueue *ioq);
+void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb);
+struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
+                      unsigned int count, long long offset);
+int ioq_submit(IOQueue *ioq);
+
+static inline unsigned int ioq_num_queued(IOQueue *ioq)
+{
+    return ioq->queue_idx;
+}
+
+typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque);
+int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
+                       void *opaque);
+
+#endif /* IOQ_H */
commit 71973b046120a13df4eaa9143bed5ba8a67abc7f
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 15:23:00 2012 +0100

    dataplane: add event loop
    
    Outside the safety of the global mutex we need to poll on file
    descriptors.  I found epoll(2) is a convenient way to do that, although
    other options could replace this module in the future (such as an
    AioContext-based loop or glib's GMainLoop).
    
    One important feature of this small event loop implementation is that
    the loop can be terminated in a thread-safe way.  This allows QEMU to
    stop the data plane thread cleanly.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
index 34e6d57..e26bd7d 100644
--- a/hw/dataplane/Makefile.objs
+++ b/hw/dataplane/Makefile.objs
@@ -1,3 +1,3 @@
 ifeq ($(CONFIG_VIRTIO), y)
-common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o event-poll.o
 endif
diff --git a/hw/dataplane/event-poll.c b/hw/dataplane/event-poll.c
new file mode 100644
index 0000000..2b55c6e
--- /dev/null
+++ b/hw/dataplane/event-poll.c
@@ -0,0 +1,100 @@
+/*
+ * Event loop with file descriptor polling
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <sys/epoll.h>
+#include "hw/dataplane/event-poll.h"
+
+/* Add an event notifier and its callback for polling */
+void event_poll_add(EventPoll *poll, EventHandler *handler,
+                    EventNotifier *notifier, EventCallback *callback)
+{
+    struct epoll_event event = {
+        .events = EPOLLIN,
+        .data.ptr = handler,
+    };
+    handler->notifier = notifier;
+    handler->callback = callback;
+    if (epoll_ctl(poll->epoll_fd, EPOLL_CTL_ADD,
+                  event_notifier_get_fd(notifier), &event) != 0) {
+        fprintf(stderr, "failed to add event handler to epoll: %m\n");
+        exit(1);
+    }
+}
+
+/* Event callback for stopping event_poll() */
+static void handle_stop(EventHandler *handler)
+{
+    /* Do nothing */
+}
+
+void event_poll_init(EventPoll *poll)
+{
+    /* Create epoll file descriptor */
+    poll->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+    if (poll->epoll_fd < 0) {
+        fprintf(stderr, "epoll_create1 failed: %m\n");
+        exit(1);
+    }
+
+    /* Set up stop notifier */
+    if (event_notifier_init(&poll->stop_notifier, 0) < 0) {
+        fprintf(stderr, "failed to init stop notifier\n");
+        exit(1);
+    }
+    event_poll_add(poll, &poll->stop_handler,
+                   &poll->stop_notifier, handle_stop);
+}
+
+void event_poll_cleanup(EventPoll *poll)
+{
+    event_notifier_cleanup(&poll->stop_notifier);
+    close(poll->epoll_fd);
+    poll->epoll_fd = -1;
+}
+
+/* Block until the next event and invoke its callback */
+void event_poll(EventPoll *poll)
+{
+    EventHandler *handler;
+    struct epoll_event event;
+    int nevents;
+
+    /* Wait for the next event.  Only do one event per call to keep the
+     * function simple, this could be changed later. */
+    do {
+        nevents = epoll_wait(poll->epoll_fd, &event, 1, -1);
+    } while (nevents < 0 && errno == EINTR);
+    if (unlikely(nevents != 1)) {
+        fprintf(stderr, "epoll_wait failed: %m\n");
+        exit(1); /* should never happen */
+    }
+
+    /* Find out which event handler has become active */
+    handler = event.data.ptr;
+
+    /* Clear the eventfd */
+    event_notifier_test_and_clear(handler->notifier);
+
+    /* Handle the event */
+    handler->callback(handler);
+}
+
+/* Stop event_poll()
+ *
+ * This function can be used from another thread.
+ */
+void event_poll_notify(EventPoll *poll)
+{
+    event_notifier_set(&poll->stop_notifier);
+}
diff --git a/hw/dataplane/event-poll.h b/hw/dataplane/event-poll.h
new file mode 100644
index 0000000..3e8d3ec
--- /dev/null
+++ b/hw/dataplane/event-poll.h
@@ -0,0 +1,40 @@
+/*
+ * Event loop with file descriptor polling
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef EVENT_POLL_H
+#define EVENT_POLL_H
+
+#include "qemu/event_notifier.h"
+
+typedef struct EventHandler EventHandler;
+typedef void EventCallback(EventHandler *handler);
+struct EventHandler {
+    EventNotifier *notifier;        /* eventfd */
+    EventCallback *callback;        /* callback function */
+};
+
+typedef struct {
+    int epoll_fd;                   /* epoll(2) file descriptor */
+    EventNotifier stop_notifier;    /* stop poll notifier */
+    EventHandler stop_handler;      /* stop poll handler */
+} EventPoll;
+
+void event_poll_add(EventPoll *poll, EventHandler *handler,
+                    EventNotifier *notifier, EventCallback *callback);
+void event_poll_init(EventPoll *poll);
+void event_poll_cleanup(EventPoll *poll);
+void event_poll(EventPoll *poll);
+void event_poll_notify(EventPoll *poll);
+
+#endif /* EVENT_POLL_H */
commit 88807f89d945acad54c8365ff7b6ef0f0d0ddd56
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 15:15:50 2012 +0100

    dataplane: add virtqueue vring code
    
    The virtio-blk-data-plane cannot access memory using the usual QEMU
    functions since it executes outside the global mutex and the memory APIs
    are this time are not thread-safe.
    
    This patch introduces a virtqueue module based on the kernel's vhost
    vring code.  The trick is that we map guest memory ahead of time and
    access it cheaply outside the global mutex.
    
    Once the hardware emulation code can execute outside the global mutex it
    will be possible to drop this code.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
index 8c8dea1..34e6d57 100644
--- a/hw/dataplane/Makefile.objs
+++ b/hw/dataplane/Makefile.objs
@@ -1,3 +1,3 @@
 ifeq ($(CONFIG_VIRTIO), y)
-common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o
 endif
diff --git a/hw/dataplane/vring.c b/hw/dataplane/vring.c
new file mode 100644
index 0000000..d5d4ef4
--- /dev/null
+++ b/hw/dataplane/vring.c
@@ -0,0 +1,362 @@
+/* Copyright 2012 Red Hat, Inc.
+ * Copyright IBM, Corp. 2012
+ *
+ * Based on Linux 2.6.39 vhost code:
+ * Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst at redhat.com>
+ *         Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/virtual/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "trace.h"
+#include "hw/dataplane/vring.h"
+
+/* Map the guest's vring to host memory */
+bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
+{
+    hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n);
+    hwaddr vring_size = virtio_queue_get_ring_size(vdev, n);
+    void *vring_ptr;
+
+    vring->broken = false;
+
+    hostmem_init(&vring->hostmem);
+    vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
+    if (!vring_ptr) {
+        error_report("Failed to map vring "
+                     "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
+                     vring_addr, vring_size);
+        vring->broken = true;
+        return false;
+    }
+
+    vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
+
+    vring->last_avail_idx = 0;
+    vring->last_used_idx = 0;
+    vring->signalled_used = 0;
+    vring->signalled_used_valid = false;
+
+    trace_vring_setup(virtio_queue_get_ring_addr(vdev, n),
+                      vring->vr.desc, vring->vr.avail, vring->vr.used);
+    return true;
+}
+
+void vring_teardown(Vring *vring)
+{
+    hostmem_finalize(&vring->hostmem);
+}
+
+/* Disable guest->host notifies */
+void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
+{
+    if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
+        vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY;
+    }
+}
+
+/* Enable guest->host notifies
+ *
+ * Return true if the vring is empty, false if there are more requests.
+ */
+bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
+{
+    if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+        vring_avail_event(&vring->vr) = vring->vr.avail->idx;
+    } else {
+        vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+    }
+    smp_mb(); /* ensure update is seen before reading avail_idx */
+    return !vring_more_avail(vring);
+}
+
+/* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
+bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
+{
+    uint16_t old, new;
+    bool v;
+    /* Flush out used index updates. This is paired
+     * with the barrier that the Guest executes when enabling
+     * interrupts. */
+    smp_mb();
+
+    if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
+        unlikely(vring->vr.avail->idx == vring->last_avail_idx)) {
+        return true;
+    }
+
+    if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
+        return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
+    }
+    old = vring->signalled_used;
+    v = vring->signalled_used_valid;
+    new = vring->signalled_used = vring->last_used_idx;
+    vring->signalled_used_valid = true;
+
+    if (unlikely(!v)) {
+        return true;
+    }
+
+    return vring_need_event(vring_used_event(&vring->vr), new, old);
+}
+
+/* This is stolen from linux/drivers/vhost/vhost.c. */
+static int get_indirect(Vring *vring,
+                        struct iovec iov[], struct iovec *iov_end,
+                        unsigned int *out_num, unsigned int *in_num,
+                        struct vring_desc *indirect)
+{
+    struct vring_desc desc;
+    unsigned int i = 0, count, found = 0;
+
+    /* Sanity check */
+    if (unlikely(indirect->len % sizeof(desc))) {
+        error_report("Invalid length in indirect descriptor: "
+                     "len %#x not multiple of %#zx",
+                     indirect->len, sizeof(desc));
+        vring->broken = true;
+        return -EFAULT;
+    }
+
+    count = indirect->len / sizeof(desc);
+    /* Buffers are chained via a 16 bit next field, so
+     * we can have at most 2^16 of these. */
+    if (unlikely(count > USHRT_MAX + 1)) {
+        error_report("Indirect buffer length too big: %d", indirect->len);
+        vring->broken = true;
+        return -EFAULT;
+    }
+
+    do {
+        struct vring_desc *desc_ptr;
+
+        /* Translate indirect descriptor */
+        desc_ptr = hostmem_lookup(&vring->hostmem,
+                                  indirect->addr + found * sizeof(desc),
+                                  sizeof(desc), false);
+        if (!desc_ptr) {
+            error_report("Failed to map indirect descriptor "
+                         "addr %#" PRIx64 " len %zu",
+                         (uint64_t)indirect->addr + found * sizeof(desc),
+                         sizeof(desc));
+            vring->broken = true;
+            return -EFAULT;
+        }
+        desc = *desc_ptr;
+
+        /* Ensure descriptor has been loaded before accessing fields */
+        barrier(); /* read_barrier_depends(); */
+
+        if (unlikely(++found > count)) {
+            error_report("Loop detected: last one at %u "
+                         "indirect size %u", i, count);
+            vring->broken = true;
+            return -EFAULT;
+        }
+
+        if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+            error_report("Nested indirect descriptor");
+            vring->broken = true;
+            return -EFAULT;
+        }
+
+        /* Stop for now if there are not enough iovecs available. */
+        if (iov >= iov_end) {
+            return -ENOBUFS;
+        }
+
+        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
+                                       desc.flags & VRING_DESC_F_WRITE);
+        if (!iov->iov_base) {
+            error_report("Failed to map indirect descriptor"
+                         "addr %#" PRIx64 " len %u",
+                         (uint64_t)desc.addr, desc.len);
+            vring->broken = true;
+            return -EFAULT;
+        }
+        iov->iov_len = desc.len;
+        iov++;
+
+        /* If this is an input descriptor, increment that count. */
+        if (desc.flags & VRING_DESC_F_WRITE) {
+            *in_num += 1;
+        } else {
+            /* If it's an output descriptor, they're all supposed
+             * to come before any input descriptors. */
+            if (unlikely(*in_num)) {
+                error_report("Indirect descriptor "
+                             "has out after in: idx %u", i);
+                vring->broken = true;
+                return -EFAULT;
+            }
+            *out_num += 1;
+        }
+        i = desc.next;
+    } while (desc.flags & VRING_DESC_F_NEXT);
+    return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error.
+ *
+ * Stolen from linux/drivers/vhost/vhost.c.
+ */
+int vring_pop(VirtIODevice *vdev, Vring *vring,
+              struct iovec iov[], struct iovec *iov_end,
+              unsigned int *out_num, unsigned int *in_num)
+{
+    struct vring_desc desc;
+    unsigned int i, head, found = 0, num = vring->vr.num;
+    uint16_t avail_idx, last_avail_idx;
+
+    /* If there was a fatal error then refuse operation */
+    if (vring->broken) {
+        return -EFAULT;
+    }
+
+    /* Check it isn't doing very strange things with descriptor numbers. */
+    last_avail_idx = vring->last_avail_idx;
+    avail_idx = vring->vr.avail->idx;
+    barrier(); /* load indices now and not again later */
+
+    if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
+        error_report("Guest moved used index from %u to %u",
+                     last_avail_idx, avail_idx);
+        vring->broken = true;
+        return -EFAULT;
+    }
+
+    /* If there's nothing new since last we looked. */
+    if (avail_idx == last_avail_idx) {
+        return -EAGAIN;
+    }
+
+    /* Only get avail ring entries after they have been exposed by guest. */
+    smp_rmb();
+
+    /* Grab the next descriptor number they're advertising, and increment
+     * the index we've seen. */
+    head = vring->vr.avail->ring[last_avail_idx % num];
+
+    /* If their number is silly, that's an error. */
+    if (unlikely(head >= num)) {
+        error_report("Guest says index %u > %u is available", head, num);
+        vring->broken = true;
+        return -EFAULT;
+    }
+
+    if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+        vring_avail_event(&vring->vr) = vring->vr.avail->idx;
+    }
+
+    /* When we start there are none of either input nor output. */
+    *out_num = *in_num = 0;
+
+    i = head;
+    do {
+        if (unlikely(i >= num)) {
+            error_report("Desc index is %u > %u, head = %u", i, num, head);
+            vring->broken = true;
+            return -EFAULT;
+        }
+        if (unlikely(++found > num)) {
+            error_report("Loop detected: last one at %u vq size %u head %u",
+                         i, num, head);
+            vring->broken = true;
+            return -EFAULT;
+        }
+        desc = vring->vr.desc[i];
+
+        /* Ensure descriptor is loaded before accessing fields */
+        barrier();
+
+        if (desc.flags & VRING_DESC_F_INDIRECT) {
+            int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
+            if (ret < 0) {
+                return ret;
+            }
+            continue;
+        }
+
+        /* If there are not enough iovecs left, stop for now.  The caller
+         * should check if there are more descs available once they have dealt
+         * with the current set.
+         */
+        if (iov >= iov_end) {
+            return -ENOBUFS;
+        }
+
+        /* TODO handle non-contiguous memory across region boundaries */
+        iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
+                                       desc.flags & VRING_DESC_F_WRITE);
+        if (!iov->iov_base) {
+            error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
+                         (uint64_t)desc.addr, desc.len);
+            vring->broken = true;
+            return -EFAULT;
+        }
+        iov->iov_len  = desc.len;
+        iov++;
+
+        if (desc.flags & VRING_DESC_F_WRITE) {
+            /* If this is an input descriptor,
+             * increment that count. */
+            *in_num += 1;
+        } else {
+            /* If it's an output descriptor, they're all supposed
+             * to come before any input descriptors. */
+            if (unlikely(*in_num)) {
+                error_report("Descriptor has out after in: idx %d", i);
+                vring->broken = true;
+                return -EFAULT;
+            }
+            *out_num += 1;
+        }
+        i = desc.next;
+    } while (desc.flags & VRING_DESC_F_NEXT);
+
+    /* On success, increment avail index. */
+    vring->last_avail_idx++;
+    return head;
+}
+
+/* After we've used one of their buffers, we tell them about it.
+ *
+ * Stolen from linux/drivers/vhost/vhost.c.
+ */
+void vring_push(Vring *vring, unsigned int head, int len)
+{
+    struct vring_used_elem *used;
+    uint16_t new;
+
+    /* Don't touch vring if a fatal error occurred */
+    if (vring->broken) {
+        return;
+    }
+
+    /* The virtqueue contains a ring of used buffers.  Get a pointer to the
+     * next entry in that used ring. */
+    used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
+    used->id = head;
+    used->len = len;
+
+    /* Make sure buffer is written before we update index. */
+    smp_wmb();
+
+    new = vring->vr.used->idx = ++vring->last_used_idx;
+    if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) {
+        vring->signalled_used_valid = false;
+    }
+}
diff --git a/hw/dataplane/vring.h b/hw/dataplane/vring.h
new file mode 100644
index 0000000..3274f62
--- /dev/null
+++ b/hw/dataplane/vring.h
@@ -0,0 +1,62 @@
+/* Copyright 2012 Red Hat, Inc. and/or its affiliates
+ * Copyright IBM, Corp. 2012
+ *
+ * Based on Linux 2.6.39 vhost code:
+ * Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst at redhat.com>
+ *         Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/virtual/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef VRING_H
+#define VRING_H
+
+#include <linux/virtio_ring.h>
+#include "qemu-common.h"
+#include "hw/dataplane/hostmem.h"
+#include "hw/virtio.h"
+
+typedef struct {
+    HostMem hostmem;                /* guest memory mapper */
+    struct vring vr;                /* virtqueue vring mapped to host memory */
+    uint16_t last_avail_idx;        /* last processed avail ring index */
+    uint16_t last_used_idx;         /* last processed used ring index */
+    uint16_t signalled_used;        /* EVENT_IDX state */
+    bool signalled_used_valid;
+    bool broken;                    /* was there a fatal error? */
+} Vring;
+
+static inline unsigned int vring_get_num(Vring *vring)
+{
+    return vring->vr.num;
+}
+
+/* Are there more descriptors available? */
+static inline bool vring_more_avail(Vring *vring)
+{
+    return vring->vr.avail->idx != vring->last_avail_idx;
+}
+
+/* Fail future vring_pop() and vring_push() calls until reset */
+static inline void vring_set_broken(Vring *vring)
+{
+    vring->broken = true;
+}
+
+bool vring_setup(Vring *vring, VirtIODevice *vdev, int n);
+void vring_teardown(Vring *vring);
+void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
+bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
+bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
+int vring_pop(VirtIODevice *vdev, Vring *vring,
+              struct iovec iov[], struct iovec *iov_end,
+              unsigned int *out_num, unsigned int *in_num);
+void vring_push(Vring *vring, unsigned int head, int len);
+
+#endif /* VRING_H */
diff --git a/trace-events b/trace-events
index bb7621e..167d776 100644
--- a/trace-events
+++ b/trace-events
@@ -98,6 +98,9 @@ virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
 virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
 
+# hw/dataplane/vring.c
+vring_setup(uint64_t physical, void *desc, void *avail, void *used) "vring physical %#"PRIx64" desc %p avail %p used %p"
+
 # thread-pool.c
 thread_pool_submit(void *req, void *opaque) "req %p opaque %p"
 thread_pool_complete(void *req, void *opaque, int ret) "req %p opaque %p ret %d"
commit 185ecf40e3589fc2717b0856ee1df05dd63a46dd
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Tue Nov 20 10:30:08 2012 +0100

    dataplane: add host memory mapping code
    
    The data plane thread needs to map guest physical addresses to host
    pointers.  Normally this is done with cpu_physical_memory_map() but the
    function assumes the global mutex is held.  The data plane thread does
    not touch the global mutex and therefore needs a thread-safe memory
    mapping mechanism.
    
    Hostmem registers a MemoryListener similar to how vhost collects and
    pushes memory region information into the kernel.  There is a
    fine-grained lock on the regions list which is held during lookup and
    when installing a new regions list.
    
    When the physical memory map changes the MemoryListener callbacks are
    invoked.  They build up a new list of memory regions which is finally
    installed when the list has been completed.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index d75f2f0..5ac4913 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y = usb/ ide/ pci/
+common-obj-y = usb/ ide/ pci/ dataplane/
 common-obj-y += loader.o
 common-obj-$(CONFIG_VIRTIO) += virtio-console.o
 common-obj-$(CONFIG_VIRTIO) += virtio-rng.o
diff --git a/hw/dataplane/Makefile.objs b/hw/dataplane/Makefile.objs
new file mode 100644
index 0000000..8c8dea1
--- /dev/null
+++ b/hw/dataplane/Makefile.objs
@@ -0,0 +1,3 @@
+ifeq ($(CONFIG_VIRTIO), y)
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o
+endif
diff --git a/hw/dataplane/hostmem.c b/hw/dataplane/hostmem.c
new file mode 100644
index 0000000..380537e
--- /dev/null
+++ b/hw/dataplane/hostmem.c
@@ -0,0 +1,176 @@
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "exec/address-spaces.h"
+#include "hostmem.h"
+
+static int hostmem_lookup_cmp(const void *phys_, const void *region_)
+{
+    hwaddr phys = *(const hwaddr *)phys_;
+    const HostMemRegion *region = region_;
+
+    if (phys < region->guest_addr) {
+        return -1;
+    } else if (phys >= region->guest_addr + region->size) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * Map guest physical address to host pointer
+ */
+void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write)
+{
+    HostMemRegion *region;
+    void *host_addr = NULL;
+    hwaddr offset_within_region;
+
+    qemu_mutex_lock(&hostmem->current_regions_lock);
+    region = bsearch(&phys, hostmem->current_regions,
+                     hostmem->num_current_regions,
+                     sizeof(hostmem->current_regions[0]),
+                     hostmem_lookup_cmp);
+    if (!region) {
+        goto out;
+    }
+    if (is_write && region->readonly) {
+        goto out;
+    }
+    offset_within_region = phys - region->guest_addr;
+    if (len <= region->size - offset_within_region) {
+        host_addr = region->host_addr + offset_within_region;
+    }
+out:
+    qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+    return host_addr;
+}
+
+/**
+ * Install new regions list
+ */
+static void hostmem_listener_commit(MemoryListener *listener)
+{
+    HostMem *hostmem = container_of(listener, HostMem, listener);
+
+    qemu_mutex_lock(&hostmem->current_regions_lock);
+    g_free(hostmem->current_regions);
+    hostmem->current_regions = hostmem->new_regions;
+    hostmem->num_current_regions = hostmem->num_new_regions;
+    qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+    /* Reset new regions list */
+    hostmem->new_regions = NULL;
+    hostmem->num_new_regions = 0;
+}
+
+/**
+ * Add a MemoryRegionSection to the new regions list
+ */
+static void hostmem_append_new_region(HostMem *hostmem,
+                                      MemoryRegionSection *section)
+{
+    void *ram_ptr = memory_region_get_ram_ptr(section->mr);
+    size_t num = hostmem->num_new_regions;
+    size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
+
+    hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
+    hostmem->new_regions[num] = (HostMemRegion){
+        .host_addr = ram_ptr + section->offset_within_region,
+        .guest_addr = section->offset_within_address_space,
+        .size = section->size,
+        .readonly = section->readonly,
+    };
+    hostmem->num_new_regions++;
+}
+
+static void hostmem_listener_append_region(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+    HostMem *hostmem = container_of(listener, HostMem, listener);
+
+    /* Ignore non-RAM regions, we may not be able to map them */
+    if (!memory_region_is_ram(section->mr)) {
+        return;
+    }
+
+    /* Ignore regions with dirty logging, we cannot mark them dirty */
+    if (memory_region_is_logging(section->mr)) {
+        return;
+    }
+
+    hostmem_append_new_region(hostmem, section);
+}
+
+/* We don't implement most MemoryListener callbacks, use these nop stubs */
+static void hostmem_listener_dummy(MemoryListener *listener)
+{
+}
+
+static void hostmem_listener_section_dummy(MemoryListener *listener,
+                                           MemoryRegionSection *section)
+{
+}
+
+static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
+                                           MemoryRegionSection *section,
+                                           bool match_data, uint64_t data,
+                                           EventNotifier *e)
+{
+}
+
+static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
+                                                  MemoryRegionSection *section,
+                                                  hwaddr addr, hwaddr len)
+{
+}
+
+void hostmem_init(HostMem *hostmem)
+{
+    memset(hostmem, 0, sizeof(*hostmem));
+
+    qemu_mutex_init(&hostmem->current_regions_lock);
+
+    hostmem->listener = (MemoryListener){
+        .begin = hostmem_listener_dummy,
+        .commit = hostmem_listener_commit,
+        .region_add = hostmem_listener_append_region,
+        .region_del = hostmem_listener_section_dummy,
+        .region_nop = hostmem_listener_append_region,
+        .log_start = hostmem_listener_section_dummy,
+        .log_stop = hostmem_listener_section_dummy,
+        .log_sync = hostmem_listener_section_dummy,
+        .log_global_start = hostmem_listener_dummy,
+        .log_global_stop = hostmem_listener_dummy,
+        .eventfd_add = hostmem_listener_eventfd_dummy,
+        .eventfd_del = hostmem_listener_eventfd_dummy,
+        .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
+        .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
+        .priority = 10,
+    };
+
+    memory_listener_register(&hostmem->listener, &address_space_memory);
+    if (hostmem->num_new_regions > 0) {
+        hostmem_listener_commit(&hostmem->listener);
+    }
+}
+
+void hostmem_finalize(HostMem *hostmem)
+{
+    memory_listener_unregister(&hostmem->listener);
+    g_free(hostmem->new_regions);
+    g_free(hostmem->current_regions);
+    qemu_mutex_destroy(&hostmem->current_regions_lock);
+}
diff --git a/hw/dataplane/hostmem.h b/hw/dataplane/hostmem.h
new file mode 100644
index 0000000..b2cf093
--- /dev/null
+++ b/hw/dataplane/hostmem.h
@@ -0,0 +1,57 @@
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *   Stefan Hajnoczi <stefanha at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HOSTMEM_H
+#define HOSTMEM_H
+
+#include "exec/memory.h"
+#include "qemu/thread.h"
+
+typedef struct {
+    void *host_addr;
+    hwaddr guest_addr;
+    uint64_t size;
+    bool readonly;
+} HostMemRegion;
+
+typedef struct {
+    /* The listener is invoked when regions change and a new list of regions is
+     * built up completely before they are installed.
+     */
+    MemoryListener listener;
+    HostMemRegion *new_regions;
+    size_t num_new_regions;
+
+    /* Current regions are accessed from multiple threads either to lookup
+     * addresses or to install a new list of regions.  The lock protects the
+     * pointer and the regions.
+     */
+    QemuMutex current_regions_lock;
+    HostMemRegion *current_regions;
+    size_t num_current_regions;
+} HostMem;
+
+void hostmem_init(HostMem *hostmem);
+void hostmem_finalize(HostMem *hostmem);
+
+/**
+ * Map a guest physical address to a pointer
+ *
+ * Note that there is map/unmap mechanism here.  The caller must ensure that
+ * mapped memory is no longer used across events like hot memory unplug.  This
+ * can be done with other mechanisms like bdrv_drain_all() that quiesce
+ * in-flight I/O.
+ */
+void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write);
+
+#endif /* HOSTMEM_H */
commit 583f6e7bbd24b31f7eecd5c21ba0a5a5a77f52f1
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 15:04:15 2012 +0100

    configure: add CONFIG_VIRTIO_BLK_DATA_PLANE
    
    The virtio-blk-data-plane feature only works with Linux AIO.  Therefore
    add a ./configure option and necessary checks to implement this
    dependency.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/configure b/configure
index b0c7e54..cc1e20a 100755
--- a/configure
+++ b/configure
@@ -223,6 +223,7 @@ libiscsi=""
 coroutine=""
 seccomp=""
 glusterfs=""
+virtio_blk_data_plane=""
 
 # parse CC options first
 for opt do
@@ -882,6 +883,10 @@ for opt do
   ;;
   --enable-glusterfs) glusterfs="yes"
   ;;
+  --disable-virtio-blk-data-plane) virtio_blk_data_plane="no"
+  ;;
+  --enable-virtio-blk-data-plane) virtio_blk_data_plane="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -2274,6 +2279,17 @@ EOF
 fi
 
 ##########################################
+# adjust virtio-blk-data-plane based on linux-aio
+
+if test "$virtio_blk_data_plane" = "yes" -a \
+	"$linux_aio" != "yes" ; then
+  echo "Error: virtio-blk-data-plane requires Linux AIO, please try --enable-linux-aio"
+  exit 1
+elif test -z "$virtio_blk_data_plane" ; then
+  virtio_blk_data_plane=$linux_aio
+fi
+
+##########################################
 # attr probe
 
 if test "$attr" != "no" ; then
@@ -3289,6 +3305,7 @@ echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine_backend"
 echo "GlusterFS support $glusterfs"
+echo "virtio-blk-data-plane $virtio_blk_data_plane"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3634,6 +3651,10 @@ if test "$glusterfs" = "yes" ; then
   echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
 fi
 
+if test "$virtio_blk_data_plane" = "yes" ; then
+  echo "CONFIG_VIRTIO_BLK_DATA_PLANE=y" >> $config_host_mak
+fi
+
 # USB host support
 case "$usb" in
 linux)
commit 4065742ac0f3c84abdd8d718b44a88f3ac56015a
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Nov 14 11:43:23 2012 +0100

    raw-posix: add raw_get_aio_fd() for virtio-blk-data-plane
    
    The raw_get_aio_fd() function allows virtio-blk-data-plane to get the
    file descriptor of a raw image file with Linux AIO enabled.  This
    interface is really a layering violation that can be resolved once the
    block layer is able to run outside the global mutex - at that point
    virtio-blk-data-plane will switch from custom Linux AIO code to using
    the block layer.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 91159c7..87d888e 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1776,6 +1776,40 @@ static BlockDriver bdrv_host_cdrom = {
 };
 #endif /* __FreeBSD__ */
 
+#ifdef CONFIG_LINUX_AIO
+/**
+ * Return the file descriptor for Linux AIO
+ *
+ * This function is a layering violation and should be removed when it becomes
+ * possible to call the block layer outside the global mutex.  It allows the
+ * caller to hijack the file descriptor so I/O can be performed outside the
+ * block layer.
+ */
+int raw_get_aio_fd(BlockDriverState *bs)
+{
+    BDRVRawState *s;
+
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+
+    if (bs->drv == bdrv_find_format("raw")) {
+        bs = bs->file;
+    }
+
+    /* raw-posix has several protocols so just check for raw_aio_readv */
+    if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
+        return -ENOTSUP;
+    }
+
+    s = bs->opaque;
+    if (!s->use_aio) {
+        return -ENOTSUP;
+    }
+    return s->fd;
+}
+#endif /* CONFIG_LINUX_AIO */
+
 static void bdrv_file_init(void)
 {
     /*
diff --git a/include/block/block.h b/include/block/block.h
index b81d200..0719339 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -365,6 +365,15 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
 void bdrv_set_in_use(BlockDriverState *bs, int in_use);
 int bdrv_in_use(BlockDriverState *bs);
 
+#ifdef CONFIG_LINUX_AIO
+int raw_get_aio_fd(BlockDriverState *bs);
+#else
+static inline int raw_get_aio_fd(BlockDriverState *bs)
+{
+    return -ENOTSUP;
+}
+#endif
+
 enum BlockAcctType {
     BDRV_ACCT_READ,
     BDRV_ACCT_WRITE,
commit 34daffa04886444dafd4a6951167225e824003d0
Merge: 079944e 0a2a59d
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 2 08:01:54 2013 -0600

    Merge remote-tracking branch 'qemu-kvm/uq/master' into staging
    
    * qemu-kvm/uq/master:
      qemu-kvm/pci-assign: 64 bits bar emulation
      target-i386: Enabling IA32_TSC_ADJUST for QEMU KVM guest VMs
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 079944e695589364d19de31f27761c6bdea1c207
Merge: a97ff30 89d62be
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 2 08:01:36 2013 -0600

    Merge remote-tracking branch 'mst/tags/for_anthony' into staging
    
    pci,virtio
    
    This optimizes MSIX handling in virtio-pci.
    Also included is pci express capability bugfix.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>
    
    * mst/tags/for_anthony:
      virtio-pci: don't poll masked vectors
      msix: expose access to masked/pending state
      msi: add API to get notified about pending bit poll
      pcie: Fix bug in pcie_ext_cap_set_next
      virtio: make bindings typesafe

diff --cc hw/s390-virtio-bus.c
index b78d626,84fba96..7e99175
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@@ -364,18 -363,33 +364,32 @@@ VirtIOS390Device *s390_virtio_bus_find_
      return NULL;
  }
  
- static void virtio_s390_notify(void *opaque, uint16_t vector)
+ /* DeviceState to VirtIOS390Device. Note: used on datapath,
+  * be careful and test performance if you change this.
+  */
+ static inline VirtIOS390Device *to_virtio_s390_device_fast(DeviceState *d)
+ {
+     return container_of(d, VirtIOS390Device, qdev);
+ }
+ 
+ /* DeviceState to VirtIOS390Device. TODO: use QOM. */
+ static inline VirtIOS390Device *to_virtio_s390_device(DeviceState *d)
+ {
+     return container_of(d, VirtIOS390Device, qdev);
+ }
+ 
+ static void virtio_s390_notify(DeviceState *d, uint16_t vector)
  {
-     VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
+     VirtIOS390Device *dev = to_virtio_s390_device_fast(d);
      uint64_t token = s390_virtio_device_vq_token(dev, vector);
      S390CPU *cpu = s390_cpu_addr2state(0);
 -    CPUS390XState *env = &cpu->env;
  
 -    s390_virtio_irq(env, 0, token);
 +    s390_virtio_irq(cpu, 0, token);
  }
  
- static unsigned virtio_s390_get_features(void *opaque)
+ static unsigned virtio_s390_get_features(DeviceState *d)
  {
-     VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
+     VirtIOS390Device *dev = to_virtio_s390_device(d);
      return dev->host_features;
  }
  
commit a97ff30e934c4d673122cf709e8e87f0effbe2f7
Merge: 5928023 15faf94
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 2 08:01:22 2013 -0600

    Merge remote-tracking branch 'kraxel/seabios-a810e4e' into staging
    
    * kraxel/seabios-a810e4e:
      Update seabios to a810e4e72a0d42c7bc04eda57382f8e019add901
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 5928023cef87847a295035487397b9ec701fdd6b
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sat Nov 24 23:03:13 2012 +0100

    pflash_cfi01: Suppress warning when Linux probes for AMD flash
    
    There are several ARM and MIPS boards which are manufactured with
    either Intel (pflash_cfi01.c) or AMD (pflash_cfi02.c) flash memory.
    
    The Linux kernel supports both and first probes for AMD flash which
    resulted in one or two warnings from the Intel flash emulation:
    
    pflash_write: Unimplemented flash cmd sequence (offset 0000000000000000, wcycle 0x0 cmd 0x0 value 0xf000f0)
    pflash_write: Unimplemented flash cmd sequence (offset 0000000000000000, wcycle 0x0 cmd 0x0 value 0xf0)
    
    These warnings confuse users, so suppress them.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c
index 95e07e7..aadedef 100644
--- a/hw/pflash_cfi01.c
+++ b/hw/pflash_cfi01.c
@@ -319,6 +319,9 @@ static void pflash_write(pflash_t *pfl, hwaddr offset,
             DPRINTF("%s: Write to buffer\n", __func__);
             pfl->status |= 0x80; /* Ready! */
             break;
+        case 0xf0: /* Probe for AMD flash */
+            DPRINTF("%s: Probe for AMD flash\n", __func__);
+            goto reset_flash;
         case 0xff: /* Read array mode */
             DPRINTF("%s: Read array mode\n", __func__);
             goto reset_flash;
commit 0f0b93980572726e69e32ff13e2d7fb72b936157
Author: é™³éŸ‹ä»» (Wei-Ren Chen) <chenwj at iis.sinica.edu.tw>
Date:   Tue Dec 11 00:15:55 2012 +0800

    target-mips: Use EXCP_SC rather than a magic number
    
      From the discussion on the ML [1], the exception limit defined by
    magic number 0x100 is actually EXCP_SC defined in cpu.h. Replace the
    magic number with EXCP_SC. Remove "#if 1 .. #endif" as well.
    
    [1] http://lists.gnu.org/archive/html/qemu-devel/2012-11/msg03080.html
    
    Signed-off-by: Chen Wei-Ren <chenwj at iis.sinica.edu.tw>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index e3ab05c..d833d78 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -38,10 +38,9 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
                                                         int error_code,
                                                         uintptr_t pc)
 {
-#if 1
-    if (exception < 0x100)
+    if (exception < EXCP_SC) {
         qemu_log("%s: %d %d\n", __func__, exception, error_code);
-#endif
+    }
     env->exception_index = exception;
     env->error_code = error_code;
 
commit c4aaba92e516ad061dff7ac2ae3c2b2b7058c404
Author: Jovanovic, Petar <petarj at mips.com>
Date:   Tue Dec 11 15:06:35 2012 +0000

    target-mips: Make repl_ph to sign extend to target-long
    
    The immediate value is 9bits, should sign-extend to 16bits. The return value to
    register should sign-extend to target_long, as Richard says, removing an
    unnecessary cast works fun.
    
    Signed-off-by: Dongxue Zhang <elta.era at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/translate.c b/target-mips/translate.c
index e81ff38..6281e70 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -13769,9 +13769,10 @@ static void gen_mipsdsp_bitinsn(CPUMIPSState *env, DisasContext *ctx,
             check_dsp(ctx);
             {
                 imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
                 tcg_gen_movi_tl(cpu_gpr[ret], \
                                 (target_long)((int32_t)imm << 16 | \
-                                (uint32_t)(uint16_t)imm));
+                                (uint16_t)imm));
             }
             break;
         case OPC_REPLV_PH:
commit fe65a1fad6aa140769ffda31c34a109f7d2df101
Author: Dongxue Zhang <elta.era at gmail.com>
Date:   Tue Dec 11 22:28:28 2012 +0800

    Fix my email address
    
    Fix my email address, last time it's wrong.
    
    Signed-off-by: Dongxue Zhang <elta.era at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index ee0c872..a33e2bf 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -2,7 +2,7 @@
  * MIPS ASE DSP Instruction emulation helpers for QEMU.
  *
  * Copyright (c) 2012  Jia Liu <proljc at gmail.com>
- *                     Dongxue Zhang <elat.era at gmail.com>
+ *                     Dongxue Zhang <elta.era at gmail.com>
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
commit 79eb8392db19a916f6a3277f7cd36fb22c2bdbaf
Author: Stefan Weil <sw at weilnetz.de>
Date:   Sat Dec 22 13:38:19 2012 +0100

    target-mips: Remove semicolon from macro definition
    
    Macro RESTORE_FLUSH_MODE is similar to RESTORE_ROUNDING_MODE
    but included a semicolon.
    
    The code which uses that macro also includes a semicolon,
    so the result was an empty statement.
    
    Remove the superfluous semicolon from the macro definition.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index e85edce..e3ab05c 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -2175,7 +2175,7 @@ static unsigned int ieee_rm[] = {
     set_float_rounding_mode(ieee_rm[env->active_fpu.fcr31 & 3], &env->active_fpu.fp_status)
 
 #define RESTORE_FLUSH_MODE \
-    set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0, &env->active_fpu.fp_status);
+    set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0, &env->active_fpu.fp_status)
 
 target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
 {
commit b8abbbe8df5e04085f4b85fc4f7cf85efbcd492c
Author: Petar Jovanovic <petarj at mips.com>
Date:   Mon Dec 10 16:28:17 2012 +0100

    target-mips: Fix for helpers for EXTR_* instructions
    
    The change removes some unnecessary and incorrect code for EXTR_S.H.
    Further, it corrects the mask for shift value in the EXTR_ instructions. It also
    extends the existing tests so they trigger the issues corrected with the change.
    
    Signed-off-by: Petar Jovanovic <petarj at mips.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index acf7ceb..ee0c872 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -484,35 +484,6 @@ static inline uint8_t mipsdsp_rrshift1_sub_u8(uint8_t a, uint8_t b)
     return (temp >> 1) & 0x00FF;
 }
 
-static inline int64_t mipsdsp_rashift_short_acc(int32_t ac,
-                                                int32_t shift,
-                                                CPUMIPSState *env)
-{
-    int32_t sign, temp31;
-    int64_t temp, acc;
-
-    sign = (env->active_tc.HI[ac] >> 31) & 0x01;
-    acc = ((int64_t)env->active_tc.HI[ac] << 32) |
-          ((int64_t)env->active_tc.LO[ac] & 0xFFFFFFFF);
-    if (shift == 0) {
-        temp = acc;
-    } else {
-        if (sign == 0) {
-            temp = (((int64_t)0x01 << (32 - shift + 1)) - 1) & (acc >> shift);
-        } else {
-            temp = ((((int64_t)0x01 << (shift + 1)) - 1) << (32 - shift)) |
-                   (acc >> shift);
-        }
-    }
-
-    temp31 = (temp >> 31) & 0x01;
-    if (sign != temp31) {
-        set_DSPControl_overflow_flag(1, 23, env);
-    }
-
-    return temp;
-}
-
 /*  128 bits long. p[0] is LO, p[1] is HI. */
 static inline void mipsdsp_rndrashift_short_acc(int64_t *p,
                                                 int32_t ac,
@@ -3407,7 +3378,7 @@ target_ulong helper_extr_w(target_ulong ac, target_ulong shift,
     int32_t tempI;
     int64_t tempDL[2];
 
-    shift = shift & 0x0F;
+    shift = shift & 0x1F;
 
     mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
     if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
@@ -3435,7 +3406,7 @@ target_ulong helper_extr_r_w(target_ulong ac, target_ulong shift,
 {
     int64_t tempDL[2];
 
-    shift = shift & 0x0F;
+    shift = shift & 0x1F;
 
     mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
     if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
@@ -3462,7 +3433,7 @@ target_ulong helper_extr_rs_w(target_ulong ac, target_ulong shift,
     int32_t tempI, temp64;
     int64_t tempDL[2];
 
-    shift = shift & 0x0F;
+    shift = shift & 0x1F;
 
     mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
     if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
@@ -3645,11 +3616,15 @@ target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
 target_ulong helper_extr_s_h(target_ulong ac, target_ulong shift,
                              CPUMIPSState *env)
 {
-    int64_t temp;
+    int64_t temp, acc;
+
+    shift = shift & 0x1F;
+
+    acc = ((int64_t)env->active_tc.HI[ac] << 32) |
+          ((int64_t)env->active_tc.LO[ac] & 0xFFFFFFFF);
 
-    shift = shift & 0x0F;
+    temp = acc >> shift;
 
-    temp = mipsdsp_rashift_short_acc(ac, shift, env);
     if (temp > (int64_t)0x7FFF) {
         temp = 0x00007FFF;
         set_DSPControl_overflow_flag(1, 23, env);
diff --git a/tests/tcg/mips/mips32-dsp/extr_r_w.c b/tests/tcg/mips/mips32-dsp/extr_r_w.c
index 0beeefd..02e0224 100644
--- a/tests/tcg/mips/mips32-dsp/extr_r_w.c
+++ b/tests/tcg/mips/mips32-dsp/extr_r_w.c
@@ -44,5 +44,28 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dspcontrol */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    ach = 0x3fffffff;
+    acl = 0x2bcdef01;
+    result = 0x7ffffffe;
+    __asm
+        ("mthi %2, $ac1\n\t"
+         "mtlo %3, $ac1\n\t"
+         "extr_r.w %0, $ac1, 0x1F\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "=r"(dsp)
+         : "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extr_rs_w.c b/tests/tcg/mips/mips32-dsp/extr_rs_w.c
index 24c748d..c3a22ee 100644
--- a/tests/tcg/mips/mips32-dsp/extr_rs_w.c
+++ b/tests/tcg/mips/mips32-dsp/extr_rs_w.c
@@ -44,5 +44,28 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dspcontrol */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    ach = 0x3fffffff;
+    acl = 0x2bcdef01;
+    result = 0x7ffffffe;
+    __asm
+        ("mthi %2, $ac1\n\t"
+         "mtlo %3, $ac1\n\t"
+         "extr_rs.w %0, $ac1, 0x1F\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "=r"(dsp)
+         : "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extr_s_h.c b/tests/tcg/mips/mips32-dsp/extr_s_h.c
index b212913..9bc2a63 100644
--- a/tests/tcg/mips/mips32-dsp/extr_s_h.c
+++ b/tests/tcg/mips/mips32-dsp/extr_s_h.c
@@ -59,5 +59,28 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dsp */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    ach = 0x123;
+    acl = 0x87654321;
+    result = 0x1238;
+    __asm
+        ("mthi %2, $ac1\n\t"
+         "mtlo %3, $ac1\n\t"
+         "extr_s.h %0, $ac1, 28\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "=r"(dsp)
+         : "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extr_w.c b/tests/tcg/mips/mips32-dsp/extr_w.c
index 02ab9ec..bd6b0b9 100644
--- a/tests/tcg/mips/mips32-dsp/extr_w.c
+++ b/tests/tcg/mips/mips32-dsp/extr_w.c
@@ -44,5 +44,28 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dspcontrol */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    ach = 0x3fffffff;
+    acl = 0x2bcdef01;
+    result = 0x7ffffffe;
+    __asm
+        ("mthi %2, $ac1\n\t"
+         "mtlo %3, $ac1\n\t"
+         "extr.w %0, $ac1, 0x1F\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "=r"(dsp)
+         : "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extrv_r_w.c b/tests/tcg/mips/mips32-dsp/extrv_r_w.c
index 005807b..2403b3a 100644
--- a/tests/tcg/mips/mips32-dsp/extrv_r_w.c
+++ b/tests/tcg/mips/mips32-dsp/extrv_r_w.c
@@ -50,5 +50,30 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dspcontrol */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    rs = 31;
+    ach = 0x3fffffff;
+    acl = 0x2bcdef01;
+    result = 0x7ffffffe;
+    __asm
+        ("wrdsp %1, 0x01\n\t"
+         "mthi %3, $ac1\n\t"
+         "mtlo %4, $ac1\n\t"
+         "extrv_r.w %0, $ac1, %2\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "+r"(dsp)
+         : "r"(rs), "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extrv_rs_w.c b/tests/tcg/mips/mips32-dsp/extrv_rs_w.c
index c2d8513..ccceeb9 100644
--- a/tests/tcg/mips/mips32-dsp/extrv_rs_w.c
+++ b/tests/tcg/mips/mips32-dsp/extrv_rs_w.c
@@ -48,5 +48,30 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dspcontrol */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    rs = 0x1F;
+    ach = 0x3fffffff;
+    acl = 0x2bcdef01;
+    result = 0x7ffffffe;
+    __asm
+        ("wrdsp %1, 0x01\n\t"
+         "mthi %3, $ac1\n\t"
+         "mtlo %4, $ac1\n\t"
+         "extrv_rs.w %0, $ac1, %2\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "+r"(dsp)
+         : "r"(rs), "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extrv_s_h.c b/tests/tcg/mips/mips32-dsp/extrv_s_h.c
index 8c13b5e..feac3e2 100644
--- a/tests/tcg/mips/mips32-dsp/extrv_s_h.c
+++ b/tests/tcg/mips/mips32-dsp/extrv_s_h.c
@@ -67,5 +67,22 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    rs = 0x1C;
+    ach = 0x123;
+    acl = 0x87654321;
+    result = 0x1238;
+    __asm
+        ("wrdsp %1, 0x01\n\t"
+         "mthi %3, $ac1\n\t"
+         "mtlo %4, $ac1\n\t"
+         "extrv_s.h %0, $ac1, %2\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "+r"(dsp)
+         : "r"(rs), "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/extrv_w.c b/tests/tcg/mips/mips32-dsp/extrv_w.c
index 9cb493d..9e8b238 100644
--- a/tests/tcg/mips/mips32-dsp/extrv_w.c
+++ b/tests/tcg/mips/mips32-dsp/extrv_w.c
@@ -50,5 +50,31 @@ int main()
     assert(dsp == 0);
     assert(result == rt);
 
+    /* Clear dspcontrol */
+    dsp = 0;
+    __asm
+        ("wrdsp %0\n\t"
+         :
+         : "r"(dsp)
+        );
+
+    rs = 31;
+    ach = 0x3fffffff;
+    acl = 0x2bcdef01;
+    result = 0x7ffffffe;
+    __asm
+        ("wrdsp %1, 0x01\n\t"
+         "mthi %3, $ac1\n\t"
+         "mtlo %4, $ac1\n\t"
+         "extrv.w %0, $ac1, %2\n\t"
+         "rddsp %1\n\t"
+         : "=r"(rt), "+r"(dsp)
+         : "r"(rs), "r"(ach), "r"(acl)
+        );
+    dsp = (dsp >> 23) & 0x01;
+    assert(dsp == 0);
+    assert(result == rt);
+
+
     return 0;
 }
commit eec8972a5bc744eda695a86a984d746c240dff90
Author: Petar Jovanovic <petarj at mips.com>
Date:   Thu Dec 6 20:30:35 2012 +0100

    target-mips: Fix incorrect reads and writes to DSPControl register
    
    Upper 4 bits of ccond (bits 31..28 ) of DSPControl register are not used in
    the MIPS32 architecture. They are used in the MIPS64 architecture. For MIPS32
    these bits must be written as zero, and return zero on read.
    
    The change fixes writes (WRDSP) and reads (RDDSP) to the register. It also fixes
    the tests that use these instructions, and makes them smaller and simpler.
    
    Signed-off-by: Petar Jovanovic <petarj at mips.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index 14daf91..acf7ceb 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -3948,7 +3948,11 @@ void helper_wrdsp(target_ulong rs, target_ulong mask_num, CPUMIPSState *env)
     if (mask[4] == 1) {
         overwrite &= 0x00FFFFFF;
         newbits   &= 0x00FFFFFF;
+#if defined(TARGET_MIPS64)
         newbits   |= 0xFF000000 & rs;
+#else
+        newbits   |= 0x0F000000 & rs;
+#endif
     }
 
     if (mask[5] == 1) {
@@ -3999,7 +4003,11 @@ target_ulong helper_rddsp(target_ulong masknum, CPUMIPSState *env)
     }
 
     if (mask[4] == 1) {
+#if defined(TARGET_MIPS64)
         temp |= dsp & 0xFF000000;
+#else
+        temp |= dsp & 0x0F000000;
+#endif
     }
 
     if (mask[5] == 1) {
diff --git a/tests/tcg/mips/mips32-dsp/rddsp.c b/tests/tcg/mips/mips32-dsp/rddsp.c
index e8948ec..2f30285 100644
--- a/tests/tcg/mips/mips32-dsp/rddsp.c
+++ b/tests/tcg/mips/mips32-dsp/rddsp.c
@@ -6,14 +6,13 @@ int main()
     int dsp_i, dsp_o;
     int ccond_i, outflag_i, efi_i, c_i, scount_i, pos_i;
     int ccond_o, outflag_o, efi_o, c_o, scount_o, pos_o;
-    int ccond_r, outflag_r, efi_r, c_r, scount_r, pos_r;
 
-    ccond_i   = 0x000000BC;/* 4 */
-    outflag_i = 0x0000001B;/* 3 */
-    efi_i     = 0x00000001;/* 5 */
-    c_i       = 0x00000001;/* 2 */
-    scount_i  = 0x0000000F;/* 1 */
-    pos_i     = 0x0000000C;/* 0 */
+    ccond_i   = 0x0000000C;  /* 4 */
+    outflag_i = 0x0000001B;  /* 3 */
+    efi_i     = 0x00000001;  /* 5 */
+    c_i       = 0x00000001;  /* 2 */
+    scount_i  = 0x0000000F;  /* 1 */
+    pos_i     = 0x0000000C;  /* 0 */
 
     dsp_i = (ccond_i   << 24) | \
             (outflag_i << 16) | \
@@ -22,13 +21,6 @@ int main()
             (scount_i  <<  7) | \
             pos_i;
 
-    ccond_r   = ccond_i;
-    outflag_r = outflag_i;
-    efi_r     = efi_i;
-    c_r       = c_i;
-    scount_r  = scount_i;
-    pos_r     = pos_i;
-
     __asm
         ("wrdsp %1, 0x3F\n\t"
          "rddsp %0, 0x3F\n\t"
@@ -43,12 +35,12 @@ int main()
     scount_o  = (dsp_o >>  7) & 0x3F;
     pos_o     =  dsp_o & 0x1F;
 
-    assert(ccond_o   == ccond_r);
-    assert(outflag_o == outflag_r);
-    assert(efi_o     == efi_r);
-    assert(c_o       == c_r);
-    assert(scount_o  == scount_r);
-    assert(pos_o     == pos_r);
+    assert(ccond_o   == ccond_i);
+    assert(outflag_o == outflag_i);
+    assert(efi_o     == efi_i);
+    assert(c_o       == c_i);
+    assert(scount_o  == scount_i);
+    assert(pos_o     == pos_i);
 
     return 0;
 }
diff --git a/tests/tcg/mips/mips32-dsp/wrdsp.c b/tests/tcg/mips/mips32-dsp/wrdsp.c
index e8948ec..dc54943 100644
--- a/tests/tcg/mips/mips32-dsp/wrdsp.c
+++ b/tests/tcg/mips/mips32-dsp/wrdsp.c
@@ -6,14 +6,13 @@ int main()
     int dsp_i, dsp_o;
     int ccond_i, outflag_i, efi_i, c_i, scount_i, pos_i;
     int ccond_o, outflag_o, efi_o, c_o, scount_o, pos_o;
-    int ccond_r, outflag_r, efi_r, c_r, scount_r, pos_r;
 
-    ccond_i   = 0x000000BC;/* 4 */
-    outflag_i = 0x0000001B;/* 3 */
-    efi_i     = 0x00000001;/* 5 */
-    c_i       = 0x00000001;/* 2 */
-    scount_i  = 0x0000000F;/* 1 */
-    pos_i     = 0x0000000C;/* 0 */
+    ccond_i   = 0x000000BC;  /* 4 */
+    outflag_i = 0x0000001B;  /* 3 */
+    efi_i     = 0x00000001;  /* 5 */
+    c_i       = 0x00000001;  /* 2 */
+    scount_i  = 0x0000000F;  /* 1 */
+    pos_i     = 0x0000000C;  /* 0 */
 
     dsp_i = (ccond_i   << 24) | \
             (outflag_i << 16) | \
@@ -22,13 +21,6 @@ int main()
             (scount_i  <<  7) | \
             pos_i;
 
-    ccond_r   = ccond_i;
-    outflag_r = outflag_i;
-    efi_r     = efi_i;
-    c_r       = c_i;
-    scount_r  = scount_i;
-    pos_r     = pos_i;
-
     __asm
         ("wrdsp %1, 0x3F\n\t"
          "rddsp %0, 0x3F\n\t"
@@ -43,12 +35,12 @@ int main()
     scount_o  = (dsp_o >>  7) & 0x3F;
     pos_o     =  dsp_o & 0x1F;
 
-    assert(ccond_o   == ccond_r);
-    assert(outflag_o == outflag_r);
-    assert(efi_o     == efi_r);
-    assert(c_o       == c_r);
-    assert(scount_o  == scount_r);
-    assert(pos_o     == pos_r);
+    assert(ccond_o   == (ccond_i & 0x0F));
+    assert(outflag_o == outflag_i);
+    assert(efi_o     == efi_i);
+    assert(c_o       == c_i);
+    assert(scount_o  == scount_i);
+    assert(pos_o     == pos_i);
 
     return 0;
 }
commit a795ef8dcb8cbadffc996c41ff38927a97645234
Author: Brad Smith <brad at comstyle.com>
Date:   Fri Dec 28 01:00:26 2012 -0500

    Fix semaphores fallback code
    
    As reported in bug 1087114 the semaphores fallback code is broken which
    results in QEMU crashing and making QEMU unusable.
    
    This patch is from Paolo.
    
    This needs to be back ported to the 1.3 stable tree as well.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Brad Smith <brad at comstyle.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c
index 6374df3..4489abf 100644
--- a/qemu-thread-posix.c
+++ b/qemu-thread-posix.c
@@ -213,6 +213,7 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
     while (sem->count < 0) {
         rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
         if (rc == ETIMEDOUT) {
+            ++sem->count;
             break;
         }
         if (rc != 0) {
commit 753d99d38b5877440dde2705e30ca60e2ec62965
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Dec 28 14:19:35 2012 -0800

    tcg-hppa: Fix typo in brcond2
    
    Reported-by: Stuart Brady <sdb at zubnet.me.uk>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 5b27cf6..656e736 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -824,7 +824,7 @@ static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
         tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index);
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index);
+        tcg_out_brcond(s, TCG_COND_NE, al, bl, blconst, label_index);
         tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index);
         break;
     default:
commit 76a347e1cd0c2d6959461c89dda15ef5c4140da6
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Dec 28 14:17:02 2012 -0800

    tcg-i386: Perform cmov detection at runtime for 32-bit.
    
    Existing compile-time detection is spotty at best.  Convert
    it all to runtime detection instead.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/configure b/configure
index 99c1ec3..b0c7e54 100755
--- a/configure
+++ b/configure
@@ -3086,6 +3086,21 @@ if compile_prog "" "" ; then
     has_environ=yes
 fi
 
+########################################
+# check if cpuid.h is usable.
+
+cpuid_h=no
+cat > $TMPC << EOF
+#include <cpuid.h>
+int main(void) {
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    cpuid_h=yes
+fi
+
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -3611,6 +3626,10 @@ if test "$has_environ" = "yes" ; then
   echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
 fi
 
+if test "$cpuid_h" = "yes" ; then
+  echo "CONFIG_CPUID_H=y" >> $config_host_mak
+fi
+
 if test "$glusterfs" = "yes" ; then
   echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
 fi
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index ae82746..e083874 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -97,6 +97,18 @@ static const int tcg_target_call_oarg_regs[] = {
 # define TCG_REG_L1 TCG_REG_EDX
 #endif
 
+/* For 32-bit, we are going to attempt to determine at runtime whether cmov
+   is available.  However, the host compiler must supply <cpuid.h>, as we're
+   not going to go so far as our own inline assembly.  */
+#if TCG_TARGET_REG_BITS == 64
+# define have_cmov 1
+#elif defined(CONFIG_CPUID_H)
+#include <cpuid.h>
+static bool have_cmov;
+#else
+# define have_cmov 0
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -943,7 +955,14 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
                               TCGArg v1)
 {
     tcg_out_cmp(s, c1, c2, const_c2, 0);
-    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+    if (have_cmov) {
+        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+    } else {
+        int over = gen_new_label();
+        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
+        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
+        tcg_out_label(s, over, s->code_ptr);
+    }
 }
 
 #if TCG_TARGET_REG_BITS == 64
@@ -2243,6 +2262,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 static void tcg_target_init(TCGContext *s)
 {
+    /* For 32-bit, 99% certainty that we're running on hardware that supports
+       cmov, but we still need to check.  In case cmov is not available, we'll
+       use a small forward branch.  */
+#ifndef have_cmov
+    {
+        unsigned a, b, c, d;
+        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
+    }
+#endif
+
 #if !defined(CONFIG_USER_ONLY)
     /* fail safe */
     if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 5352ac0..e63db9c 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -91,12 +91,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
-#if defined(__x86_64__) || defined(__i686__)
-/* Use cmov only if the compiler is already doing so.  */
 #define TCG_TARGET_HAS_movcond_i32      1
-#else
-#define TCG_TARGET_HAS_movcond_i32      0
-#endif
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
commit afcb92beac9e477e5ae5c36bf38830e225e2235f
Author: Richard Henderson <rth at twiddle.net>
Date:   Fri Dec 7 15:07:17 2012 -0600

    tcg: Add TCGV_IS_UNUSED_*
    
    Cc: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 0b3cb0b..91c9d80 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -2329,6 +2329,7 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
 #define tcg_gen_qemu_ldst_op tcg_gen_op3i_i32
 #define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i32
 #define TCGV_UNUSED(x) TCGV_UNUSED_I32(x)
+#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I32(x)
 #define TCGV_EQUAL(a, b) TCGV_EQUAL_I32(a, b)
 #else
 #define TCGv TCGv_i64
@@ -2340,6 +2341,7 @@ static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
 #define tcg_gen_qemu_ldst_op tcg_gen_op3i_i64
 #define tcg_gen_qemu_ldst_op_i64 tcg_gen_qemu_ldst_op_i64_i64
 #define TCGV_UNUSED(x) TCGV_UNUSED_I64(x)
+#define TCGV_IS_UNUSED(x) TCGV_IS_UNUSED_I64(x)
 #define TCGV_EQUAL(a, b) TCGV_EQUAL_I64(a, b)
 #endif
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index b2e2a25..a427972 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -270,6 +270,9 @@ typedef int TCGv_i64;
 #define TCGV_UNUSED_I32(x) x = MAKE_TCGV_I32(-1)
 #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
 
+#define TCGV_IS_UNUSED_I32(x) (GET_TCGV_I32(x) == -1)
+#define TCGV_IS_UNUSED_I64(x) (GET_TCGV_I64(x) == -1)
+
 /* call flags */
 /* Helper does not read globals (either directly or through an exception). It
    implies TCG_CALL_NO_WRITE_GLOBALS. */
commit 927fa909d5d5cf8c07673cd16a6d3bdc81250bc0
Author: Brad Smith <brad at comstyle.com>
Date:   Fri Dec 28 01:38:11 2012 -0500

    Disable semaphores fallback code for OpenBSD
    
    Disable the semaphores fallback code for OpenBSD as modern OpenBSD
    releases now have sem_timedwait().
    
    Signed-off-by: Brad Smith <brad at comstyle.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index 380bae2..0f30dcc 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -12,7 +12,7 @@ struct QemuCond {
 };
 
 struct QemuSemaphore {
-#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+#if defined(__APPLE__) || defined(__NetBSD__)
     pthread_mutex_t lock;
     pthread_cond_t cond;
     int count;
diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c
index 7be292e..6374df3 100644
--- a/qemu-thread-posix.c
+++ b/qemu-thread-posix.c
@@ -122,7 +122,7 @@ void qemu_sem_init(QemuSemaphore *sem, int init)
 {
     int rc;
 
-#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+#if defined(__APPLE__) || defined(__NetBSD__)
     rc = pthread_mutex_init(&sem->lock, NULL);
     if (rc != 0) {
         error_exit(rc, __func__);
@@ -147,7 +147,7 @@ void qemu_sem_destroy(QemuSemaphore *sem)
 {
     int rc;
 
-#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+#if defined(__APPLE__) || defined(__NetBSD__)
     rc = pthread_cond_destroy(&sem->cond);
     if (rc < 0) {
         error_exit(rc, __func__);
@@ -168,7 +168,7 @@ void qemu_sem_post(QemuSemaphore *sem)
 {
     int rc;
 
-#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+#if defined(__APPLE__) || defined(__NetBSD__)
     pthread_mutex_lock(&sem->lock);
     if (sem->count == INT_MAX) {
         rc = EINVAL;
@@ -206,7 +206,7 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
     int rc;
     struct timespec ts;
 
-#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+#if defined(__APPLE__) || defined(__NetBSD__)
     compute_abs_deadline(&ts, ms);
     pthread_mutex_lock(&sem->lock);
     --sem->count;
@@ -248,7 +248,7 @@ int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
 
 void qemu_sem_wait(QemuSemaphore *sem)
 {
-#if defined(__OpenBSD__) || defined(__APPLE__) || defined(__NetBSD__)
+#if defined(__APPLE__) || defined(__NetBSD__)
     pthread_mutex_lock(&sem->lock);
     --sem->count;
     while (sem->count < 0) {
commit 62054c06d4d1d0d54ef87c2d9154efec00ad170c
Author: Alon Levy <alevy at redhat.com>
Date:   Sun Dec 23 23:25:09 2012 +0200

    usb/redirect.c: unbreak compilation due to include/char/char.h
    
    Broken since:
    
    commit 927d4878b0ff319ed87fed9363f314613b0a5ed9
    Author: Paolo Bonzini <pbonzini at redhat.com>
    Date:   Mon Dec 17 18:20:05 2012 +0100
    
        softmmu: move remaining include files to include/ subdirectories
    
    Signed-off-by: Alon Levy <alevy at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index b65e868..0abe1ff 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -30,6 +30,7 @@
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
 #include "qemu/iov.h"
+#include "char/char.h"
 
 #include <dirent.h>
 #include <sys/ioctl.h>
commit 4de63460ca1639bfb0e465ecdcf95551564bb3d6
Merge: a2685bc 501a7ce
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Fri Dec 28 16:08:23 2012 +0000

    Merge branch 'qom-cpu' of git://repo.or.cz/qemu/afaerber
    
    * 'qom-cpu' of git://repo.or.cz/qemu/afaerber:
      MAINTAINERS: Include X86CPU in CPU maintenance area
      cpu: Move kvm_run into CPUState
      cpu: Move kvm_state field into CPUState
      ppc_booke: Pass PowerPCCPU to ppc_booke_timers_init()
      ppc4xx_devs: Return PowerPCCPU from ppc4xx_init()
      ppc_booke: Pass PowerPCCPU to {decr,fit,wdt} timer callbacks
      ppc: Pass PowerPCCPU to [h]decr timer callbacks
      ppc: Pass PowerPCCPU to [h]decr callbacks
      ppc: Pass PowerPCCPU to ppc_set_irq()
      kvm: Pass CPUState to kvm_vcpu_ioctl()
      kvm: Pass CPUState to kvm_arch_*
      cpu: Move kvm_fd into CPUState
      qdev-properties.c: Separate core from the code used only by qemu-system-*
      qdev: Coding style fixes
      cpu: Introduce CPUListState struct
      target-alpha: Add support for -cpu ?
      target-alpha: Turn CPU definitions into subclasses
      target-alpha: Avoid leaking the alarm timer over reset
      alpha: Pass AlphaCPU array to Typhoon
      target-alpha: Let cpu_alpha_init() return AlphaCPU

commit 89d62be9f4fb538db7f919a2be7df2544ffc02c5
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Tue Dec 18 14:02:46 2012 +0200

    virtio-pci: don't poll masked vectors
    
    At the moment, when irqfd is in use but a vector is masked,
    qemu will poll it and handle vector masks in userspace.
    Since almost no one ever looks at the pending bits,
    it is better to defer this until pending bits
    are actually read.
    Implement this optimization using the new poll notifier.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 37e8b2d..af9a56c 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -508,8 +508,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
         }
         return ret;
     }
-
-    virtio_queue_set_guest_notifier_fd_handler(vq, true, true);
     return 0;
 }
 
@@ -528,8 +526,6 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
     if (--irqfd->users == 0) {
         kvm_irqchip_release_virq(kvm_state, irqfd->virq);
     }
-
-    virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
 }
 
 static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
@@ -580,7 +576,36 @@ static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
     }
 }
 
-static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign)
+static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
+                                       unsigned int vector_start,
+                                       unsigned int vector_end)
+{
+    VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+    VirtIODevice *vdev = proxy->vdev;
+    int queue_no;
+    unsigned int vector;
+    EventNotifier *notifier;
+    VirtQueue *vq;
+
+    for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            break;
+        }
+        vector = virtio_queue_vector(vdev, queue_no);
+        if (vector < vector_start || vector >= vector_end ||
+            !msix_is_masked(dev, vector)) {
+            continue;
+        }
+        vq = virtio_get_queue(vdev, queue_no);
+        notifier = virtio_queue_get_guest_notifier(vq);
+        if (event_notifier_test_and_clear(notifier)) {
+            msix_set_pending(dev, vector);
+        }
+    }
+}
+
+static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
+                                         bool with_irqfd)
 {
     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
@@ -591,9 +616,9 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign)
         if (r < 0) {
             return r;
         }
-        virtio_queue_set_guest_notifier_fd_handler(vq, true, false);
+        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
     } else {
-        virtio_queue_set_guest_notifier_fd_handler(vq, false, false);
+        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
         event_notifier_cleanup(notifier);
     }
 
@@ -611,9 +636,11 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     VirtIODevice *vdev = proxy->vdev;
     int r, n;
+    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
+        kvm_msi_via_irqfd_enabled();
 
     /* Must unset vector notifier while guest notifier is still assigned */
-    if (kvm_msi_via_irqfd_enabled() && !assign) {
+    if (proxy->vector_irqfd && !assign) {
         msix_unset_vector_notifiers(&proxy->pci_dev);
         g_free(proxy->vector_irqfd);
         proxy->vector_irqfd = NULL;
@@ -624,21 +651,22 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
             break;
         }
 
-        r = virtio_pci_set_guest_notifier(d, n, assign);
+        r = virtio_pci_set_guest_notifier(d, n, assign,
+                                          kvm_msi_via_irqfd_enabled());
         if (r < 0) {
             goto assign_error;
         }
     }
 
     /* Must set vector notifier after guest notifier has been assigned */
-    if (kvm_msi_via_irqfd_enabled() && assign) {
+    if (with_irqfd && assign) {
         proxy->vector_irqfd =
             g_malloc0(sizeof(*proxy->vector_irqfd) *
                       msix_nr_vectors_allocated(&proxy->pci_dev));
         r = msix_set_vector_notifiers(&proxy->pci_dev,
                                       kvm_virtio_pci_vector_use,
                                       kvm_virtio_pci_vector_release,
-                                      NULL);
+                                      kvm_virtio_pci_vector_poll);
         if (r < 0) {
             goto assign_error;
         }
@@ -650,7 +678,7 @@ assign_error:
     /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
     assert(assign);
     while (--n >= 0) {
-        virtio_pci_set_guest_notifier(d, n, !assign);
+        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
     }
     return r;
 }
commit 70f8ee395afda6d96b15cb9a5b311af7720dded0
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Tue Dec 18 13:54:32 2012 +0200

    msix: expose access to masked/pending state
    
    For use by poll handler.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index a285d18..9eee657 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -65,7 +65,7 @@ static int msix_is_pending(PCIDevice *dev, int vector)
     return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
 }
 
-static void msix_set_pending(PCIDevice *dev, int vector)
+void msix_set_pending(PCIDevice *dev, unsigned int vector)
 {
     *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
 }
@@ -75,13 +75,13 @@ static void msix_clr_pending(PCIDevice *dev, int vector)
     *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
 }
 
-static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
+static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask)
 {
     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
     return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
-static bool msix_is_masked(PCIDevice *dev, int vector)
+bool msix_is_masked(PCIDevice *dev, unsigned int vector)
 {
     return msix_vector_masked(dev, vector, dev->msix_function_masked);
 }
diff --git a/hw/pci/msix.h b/hw/pci/msix.h
index ea85d02..d0c4429 100644
--- a/hw/pci/msix.h
+++ b/hw/pci/msix.h
@@ -26,6 +26,9 @@ void msix_load(PCIDevice *dev, QEMUFile *f);
 int msix_enabled(PCIDevice *dev);
 int msix_present(PCIDevice *dev);
 
+bool msix_is_masked(PCIDevice *dev, unsigned vector);
+void msix_set_pending(PCIDevice *dev, unsigned vector);
+
 int msix_vector_use(PCIDevice *dev, unsigned vector);
 void msix_vector_unuse(PCIDevice *dev, unsigned vector);
 void msix_unuse_all_vectors(PCIDevice *dev);
commit bbef882cc1938fa5a6e1b36a50d79ce5c0cefb81
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Wed Dec 12 16:10:02 2012 +0200

    msi: add API to get notified about pending bit poll
    
    Update all users.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 073e22c..a285d18 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -191,6 +191,11 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
                                    unsigned size)
 {
     PCIDevice *dev = opaque;
+    if (dev->msix_vector_poll_notifier) {
+        unsigned vector_start = addr * 8;
+        unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr);
+        dev->msix_vector_poll_notifier(dev, vector_start, vector_end);
+    }
 
     return pci_get_long(dev->msix_pba + addr);
 }
@@ -513,7 +518,8 @@ static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
 
 int msix_set_vector_notifiers(PCIDevice *dev,
                               MSIVectorUseNotifier use_notifier,
-                              MSIVectorReleaseNotifier release_notifier)
+                              MSIVectorReleaseNotifier release_notifier,
+                              MSIVectorPollNotifier poll_notifier)
 {
     int vector, ret;
 
@@ -521,6 +527,7 @@ int msix_set_vector_notifiers(PCIDevice *dev,
 
     dev->msix_vector_use_notifier = use_notifier;
     dev->msix_vector_release_notifier = release_notifier;
+    dev->msix_vector_poll_notifier = poll_notifier;
 
     if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
         (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
@@ -531,6 +538,9 @@ int msix_set_vector_notifiers(PCIDevice *dev,
             }
         }
     }
+    if (dev->msix_vector_poll_notifier) {
+        dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr);
+    }
     return 0;
 
 undo:
@@ -557,4 +567,5 @@ void msix_unset_vector_notifiers(PCIDevice *dev)
     }
     dev->msix_vector_use_notifier = NULL;
     dev->msix_vector_release_notifier = NULL;
+    dev->msix_vector_poll_notifier = NULL;
 }
diff --git a/hw/pci/msix.h b/hw/pci/msix.h
index ff07ae2..ea85d02 100644
--- a/hw/pci/msix.h
+++ b/hw/pci/msix.h
@@ -36,6 +36,7 @@ void msix_reset(PCIDevice *dev);
 
 int msix_set_vector_notifiers(PCIDevice *dev,
                               MSIVectorUseNotifier use_notifier,
-                              MSIVectorReleaseNotifier release_notifier);
+                              MSIVectorReleaseNotifier release_notifier,
+                              MSIVectorPollNotifier poll_notifier);
 void msix_unset_vector_notifiers(PCIDevice *dev);
 #endif
diff --git a/hw/pci/pci.h b/hw/pci/pci.h
index 3152050..72927e3 100644
--- a/hw/pci/pci.h
+++ b/hw/pci/pci.h
@@ -187,6 +187,9 @@ typedef void (*PCIINTxRoutingNotifier)(PCIDevice *dev);
 typedef int (*MSIVectorUseNotifier)(PCIDevice *dev, unsigned int vector,
                                       MSIMessage msg);
 typedef void (*MSIVectorReleaseNotifier)(PCIDevice *dev, unsigned int vector);
+typedef void (*MSIVectorPollNotifier)(PCIDevice *dev,
+                                      unsigned int vector_start,
+                                      unsigned int vector_end);
 
 struct PCIDevice {
     DeviceState qdev;
@@ -271,6 +274,7 @@ struct PCIDevice {
     /* MSI-X notifiers */
     MSIVectorUseNotifier msix_vector_use_notifier;
     MSIVectorReleaseNotifier msix_vector_release_notifier;
+    MSIVectorPollNotifier msix_vector_poll_notifier;
 };
 
 void pci_register_bar(PCIDevice *pci_dev, int region_num,
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 41fb7ad..28c8303 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -698,7 +698,7 @@ static void vfio_enable_msix(VFIODevice *vdev)
     vdev->interrupt = VFIO_INT_MSIX;
 
     if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
-                                  vfio_msix_vector_release)) {
+                                  vfio_msix_vector_release, NULL)) {
         error_report("vfio: msix_set_vector_notifiers failed\n");
     }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 1f1a285..37e8b2d 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -637,7 +637,8 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
                       msix_nr_vectors_allocated(&proxy->pci_dev));
         r = msix_set_vector_notifiers(&proxy->pci_dev,
                                       kvm_virtio_pci_vector_use,
-                                      kvm_virtio_pci_vector_release);
+                                      kvm_virtio_pci_vector_release,
+                                      NULL);
         if (r < 0) {
             goto assign_error;
         }
commit 812d2594d558f7c4f95c99c8fc58adc47ab68eb3
Author: Knut Omang <knut.omang at oracle.com>
Date:   Tue Dec 18 22:36:29 2012 +0100

    pcie: Fix bug in pcie_ext_cap_set_next
    
    Upper 16 bits of the PCIe Extended Capability Header was truncated during update,
    also breaking pcie_add_capability.
    
    Signed-off-by: Knut Omang <knut.omang at oracle.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6c916d1..485c94c 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -494,7 +494,7 @@ uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id)
 
 static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t next)
 {
-    uint16_t header = pci_get_long(dev->config + pos);
+    uint32_t header = pci_get_long(dev->config + pos);
     assert(!(next & (PCI_EXT_CAP_ALIGN - 1)));
     header = (header & ~PCI_EXT_CAP_NEXT_MASK) |
         ((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK);
commit 0a2a59d35cbabf63c91340a1c62038e3e60538c1
Author: Xudong Hao <xudong.hao at intel.com>
Date:   Thu Dec 20 11:07:23 2012 +0800

    qemu-kvm/pci-assign: 64 bits bar emulation
    
    Enable 64 bits bar emulation.
    
    Test pass with the current seabios which already support 64bit pci bars.
    
    Signed-off-by: Xudong Hao <xudong.hao at intel.com>
    Reviewed-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Gleb Natapov <gleb at redhat.com>

diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index e80dad0..addc205 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -46,6 +46,7 @@
 #define IORESOURCE_IRQ      0x00000400
 #define IORESOURCE_DMA      0x00000800
 #define IORESOURCE_PREFETCH 0x00002000  /* No side effects */
+#define IORESOURCE_MEM_64   0x00100000
 
 //#define DEVICE_ASSIGNMENT_DEBUG
 
@@ -442,9 +443,13 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
 
         /* handle memory io regions */
         if (cur_region->type & IORESOURCE_MEM) {
-            int t = cur_region->type & IORESOURCE_PREFETCH
-                ? PCI_BASE_ADDRESS_MEM_PREFETCH
-                : PCI_BASE_ADDRESS_SPACE_MEMORY;
+            int t = PCI_BASE_ADDRESS_SPACE_MEMORY;
+            if (cur_region->type & IORESOURCE_PREFETCH) {
+                t |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+            }
+            if (cur_region->type & IORESOURCE_MEM_64) {
+                t |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+            }
 
             /* map physical memory */
             pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size,
@@ -632,7 +637,8 @@ again:
         rp->valid = 0;
         rp->resource_fd = -1;
         size = end - start + 1;
-        flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+        flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH
+                 | IORESOURCE_MEM_64;
         if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) {
             continue;
         }
commit a2685bcc80f61aa612e0d8cfd91086857ae2942e
Author: Gleb Natapov <gleb at redhat.com>
Date:   Wed Dec 19 17:46:15 2012 +0200

    MAINTAINERS: Take over kvm maintenance
    
    Replace Avi with myself as kvm maintainer.
    
    Signed-off-by: Gleb Natapov <gleb at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index c1b16c5..3e9dbc2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -132,7 +132,7 @@ Guest CPU Cores (KVM):
 ----------------------
 
 Overall
-M: Avi Kivity <avi at redhat.com>
+M: Gleb Natapov <gleb at redhat.com>
 M: Marcelo Tosatti <mtosatti at redhat.com>
 L: kvm at vger.kernel.org
 S: Supported
@@ -150,7 +150,7 @@ S: Maintained
 F: target-s390x/kvm.c
 
 X86
-M: Avi Kivity <avi at redhat.com>
+M: Gleb Natapov <gleb at redhat.com>
 M: Marcelo Tosatti <mtosatti at redhat.com>
 L: kvm at vger.kernel.org
 S: Supported
commit 74880fe27d2120ab3861dc857ecd025db1a67038
Author: Robert Schiele <rschiele at gmail.com>
Date:   Tue Dec 4 16:58:08 2012 +0100

    configure: allow disabling pixman if not needed
    
    When we build neither any system emulation targets nor the tools there
    is actually no need for pixman library.  In that case do not enforce
    presence of that library on the system.
    
    Reviewed-by: Andreas F=E4rber <afaerber at suse.de>
    Signed-off-by: Robert Schiele <rschiele at gmail.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/configure b/configure
index 14f05c7..99c1ec3 100755
--- a/configure
+++ b/configure
@@ -656,6 +656,8 @@ for opt do
   ;;
   --without-system-pixman) pixman="internal"
   ;;
+  --without-pixman) pixman="none"
+  ;;
   --disable-sdl) sdl="no"
   ;;
   --enable-sdl) sdl="yes"
@@ -2130,13 +2132,25 @@ fi
 # pixman support probe
 
 if test "$pixman" = ""; then
-  if $pkg_config pixman-1 > /dev/null 2>&1; then
+  if test "$want_tools" = "no" -a "$softmmu" = "no"; then
+    pixman="none"
+  elif $pkg_config pixman-1 > /dev/null 2>&1; then
     pixman="system"
   else
     pixman="internal"
   fi
 fi
-if test "$pixman" = "system"; then
+if test "$pixman" = "none"; then
+  if test "$want_tools" != "no" -o "$softmmu" != "no"; then
+    echo "ERROR: pixman disabled but system emulation or tools build"
+    echo "       enabled.  You can turn off pixman only if you also"
+    echo "       disable all system emulation targets and the tools"
+    echo "       build with '--disable-tools --disable-system'."
+    exit 1
+  fi
+  pixman_cflags=
+  pixman_libs=
+elif test "$pixman" = "system"; then
   pixman_cflags=`$pkg_config --cflags pixman-1 2>/dev/null`
   pixman_libs=`$pkg_config --libs pixman-1 2>/dev/null`
 else
diff --git a/target-unicore32/helper.c b/target-unicore32/helper.c
index ff4f628..5359538 100644
--- a/target-unicore32/helper.c
+++ b/target-unicore32/helper.c
@@ -13,7 +13,9 @@
 #include "exec/gdbstub.h"
 #include "helper.h"
 #include "qemu/host-utils.h"
+#ifndef CONFIG_USER_ONLY
 #include "ui/console.h"
+#endif
 
 #undef DEBUG_UC32
 
commit 2915efbfa8efadaa2806e827ba92b8dba4f7cd52
Author: Alex Horn <alex.horn at cs.ox.ac.uk>
Date:   Wed Dec 5 12:34:06 2012 +0000

    tmp105: Create API for TMP105 temperature sensor.
    
    * Define enum for TMP105 registers
    * Move tmp105_set() from I2C to TMP105 header
    * Document units and range of temperature as preconditions
    
    Reviewed-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Alex Horn <alex.horn at cs.ox.ac.uk>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/i2c.h b/hw/i2c.h
index 0f5682b..883b5c5 100644
--- a/hw/i2c.h
+++ b/hw/i2c.h
@@ -73,9 +73,6 @@ void *wm8750_dac_buffer(void *opaque, int samples);
 void wm8750_dac_commit(void *opaque);
 void wm8750_set_bclk_in(void *opaque, int new_hz);
 
-/* tmp105.c */
-void tmp105_set(I2CSlave *i2c, int temp);
-
 /* lm832x.c */
 void lm832x_key_event(DeviceState *dev, int key, int state);
 
diff --git a/hw/tmp105.c b/hw/tmp105.c
index 8e8dbd9..9c67e64 100644
--- a/hw/tmp105.c
+++ b/hw/tmp105.c
@@ -20,6 +20,7 @@
 
 #include "hw.h"
 #include "i2c.h"
+#include "tmp105.h"
 
 typedef struct {
     I2CSlave i2c;
@@ -92,22 +93,22 @@ static void tmp105_read(TMP105State *s)
     }
 
     switch (s->pointer & 3) {
-    case 0:	/* Temperature */
+    case TMP105_REG_TEMPERATURE:
         s->buf[s->len ++] = (((uint16_t) s->temperature) >> 8);
         s->buf[s->len ++] = (((uint16_t) s->temperature) >> 0) &
                 (0xf0 << ((~s->config >> 5) & 3));		/* R */
         break;
 
-    case 1:	/* Configuration */
+    case TMP105_REG_CONFIG:
         s->buf[s->len ++] = s->config;
         break;
 
-    case 2:	/* T_LOW */
+    case TMP105_REG_T_LOW:
         s->buf[s->len ++] = ((uint16_t) s->limit[0]) >> 8;
         s->buf[s->len ++] = ((uint16_t) s->limit[0]) >> 0;
         break;
 
-    case 3:	/* T_HIGH */
+    case TMP105_REG_T_HIGH:
         s->buf[s->len ++] = ((uint16_t) s->limit[1]) >> 8;
         s->buf[s->len ++] = ((uint16_t) s->limit[1]) >> 0;
         break;
@@ -117,10 +118,10 @@ static void tmp105_read(TMP105State *s)
 static void tmp105_write(TMP105State *s)
 {
     switch (s->pointer & 3) {
-    case 0:	/* Temperature */
+    case TMP105_REG_TEMPERATURE:
         break;
 
-    case 1:	/* Configuration */
+    case TMP105_REG_CONFIG:
         if (s->buf[0] & ~s->config & (1 << 0))			/* SD */
             printf("%s: TMP105 shutdown\n", __FUNCTION__);
         s->config = s->buf[0];
@@ -128,8 +129,8 @@ static void tmp105_write(TMP105State *s)
         tmp105_alarm_update(s);
         break;
 
-    case 2:	/* T_LOW */
-    case 3:	/* T_HIGH */
+    case TMP105_REG_T_LOW:
+    case TMP105_REG_T_HIGH:
         if (s->len >= 3)
             s->limit[s->pointer & 1] = (int16_t)
                     ((((uint16_t) s->buf[0]) << 8) | s->buf[1]);
diff --git a/hw/tmp105.h b/hw/tmp105.h
new file mode 100644
index 0000000..51eff4b
--- /dev/null
+++ b/hw/tmp105.h
@@ -0,0 +1,67 @@
+/*
+ * Texas Instruments TMP105 Temperature Sensor
+ *
+ * Browse the data sheet:
+ *
+ *    http://www.ti.com/lit/gpn/tmp105
+ *
+ * Copyright (C) 2012 Alex Horn <alex.horn at cs.ox.ac.uk>
+ * Copyright (C) 2008-2012 Andrzej Zaborowski <balrogg at gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_TMP105_H
+#define QEMU_TMP105_H
+
+#include "i2c.h"
+
+/**
+ * TMP105Reg:
+ * @TMP105_REG_TEMPERATURE: Temperature register
+ * @TMP105_REG_CONFIG: Configuration register
+ * @TMP105_REG_T_LOW: Low temperature register (also known as T_hyst)
+ * @TMP105_REG_T_HIGH: High temperature register (also known as T_OS)
+ *
+ * The following temperature sensors are
+ * compatible with the TMP105 registers:
+ * - adt75
+ * - ds1775
+ * - ds75
+ * - lm75
+ * - lm75a
+ * - max6625
+ * - max6626
+ * - mcp980x
+ * - stds75
+ * - tcn75
+ * - tmp100
+ * - tmp101
+ * - tmp105
+ * - tmp175
+ * - tmp275
+ * - tmp75
+ **/
+typedef enum TMP105Reg {
+    TMP105_REG_TEMPERATURE = 0,
+    TMP105_REG_CONFIG,
+    TMP105_REG_T_LOW,
+    TMP105_REG_T_HIGH,
+} TMP105Reg;
+
+/**
+ * tmp105_set:
+ * @i2c: dispatcher to TMP105 hardware model
+ * @temp: temperature with 0.001 centigrades units in the range -40 C to +125 C
+ *
+ * Sets the temperature of the TMP105 hardware model.
+ *
+ * Bits 5 and 6 (value 32 and 64) in the register indexed by TMP105_REG_CONFIG
+ * determine the precision of the temperature. See Table 8 in the data sheet.
+ *
+ * @see_also: I2C_SLAVE macro
+ * @see_also: http://www.ti.com/lit/gpn/tmp105
+ */
+void tmp105_set(I2CSlave *i2c, int temp);
+
+#endif
commit eac236ea7bfc1902126be70459e320591078df5c
Author: LluÃs Vilanova <vilanova at ac.upc.edu>
Date:   Fri Dec 14 20:13:09 2012 +0100

    build: Use separate makefile for "trace/"
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: LluÃs Vilanova <vilanova at ac.upc.edu>
    --
    Changes in v2:
    
    * Do not depend on "qemu-timer-common.o".
    * Use "$(obj)" in rules to refer to the build sub-directory.
    * Remove dependencies against "$(GENERATED_HEADERS)".
    
    Cc: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/.gitignore b/.gitignore
index 3a41765..0e38169 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,10 +3,10 @@ config-all-devices.*
 config-all-disas.*
 config-host.*
 config-target.*
-trace.h
-trace.c
-trace-dtrace.h
-trace-dtrace.dtrace
+trace/generated-tracers.h
+trace/generated-tracers.c
+trace/generated-tracers-dtrace.h
+trace/generated-tracers-dtrace.dtrace
 *-timestamp
 *-softmmu
 *-darwin-user
diff --git a/Makefile b/Makefile
index a0321dd..a7ac04b 100644
--- a/Makefile
+++ b/Makefile
@@ -31,12 +31,15 @@ ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 endif
 endif
 
-GENERATED_HEADERS = config-host.h trace.h qemu-options.def
+GENERATED_HEADERS = config-host.h qemu-options.def
+GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
+GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c
+
+GENERATED_HEADERS += trace/generated-tracers.h
 ifeq ($(TRACE_BACKEND),dtrace)
-GENERATED_HEADERS += trace-dtrace.h
+GENERATED_HEADERS += trace/generated-tracers-dtrace.h
 endif
-GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h
-GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c trace.c
+GENERATED_SOURCES += trace/generated-tracers.c
 
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
@@ -233,9 +236,9 @@ clean:
 	rm -f *.a *.lo $(TOOLS) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
 	rm -Rf .libs
 	rm -f qemu-img-cmds.h
-	rm -f trace-dtrace.dtrace trace-dtrace.dtrace-timestamp
 	@# May not be present in GENERATED_HEADERS
-	rm -f trace-dtrace.h trace-dtrace.h-timestamp
+	rm -f trace/generated-tracers-dtrace.dtrace*
+	rm -f trace/generated-tracers-dtrace.h*
 	rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp)
 	rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp)
 	rm -rf qapi-generated
diff --git a/Makefile.objs b/Makefile.objs
index 4ef0a71..3a3a402 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -133,66 +133,7 @@ universal-obj-y += disas/
 ######################################################################
 # trace
 
-ifeq ($(TRACE_BACKEND),dtrace)
-TRACE_H_EXTRA_DEPS=trace-dtrace.h
-endif
-trace.h: trace.h-timestamp $(TRACE_H_EXTRA_DEPS)
-trace.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-	$(call quiet-command,$(TRACETOOL) \
-		--format=h \
-		--backend=$(TRACE_BACKEND) \
-		< $< > $@,"  GEN   trace.h")
-	@cmp -s $@ trace.h || cp $@ trace.h
-
-trace.c: trace.c-timestamp
-trace.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-	$(call quiet-command,$(TRACETOOL) \
-		--format=c \
-		--backend=$(TRACE_BACKEND) \
-		< $< > $@,"  GEN   trace.c")
-	@cmp -s $@ trace.c || cp $@ trace.c
-
-trace.o: trace.c $(GENERATED_HEADERS)
-
-trace-dtrace.h: trace-dtrace.dtrace
-	$(call quiet-command,dtrace -o $@ -h -s $<, "  GEN   trace-dtrace.h")
-
-# Normal practice is to name DTrace probe file with a '.d' extension
-# but that gets picked up by QEMU's Makefile as an external dependency
-# rule file. So we use '.dtrace' instead
-trace-dtrace.dtrace: trace-dtrace.dtrace-timestamp
-trace-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-	$(call quiet-command,$(TRACETOOL) \
-		--format=d \
-		--backend=$(TRACE_BACKEND) \
-		< $< > $@,"  GEN   trace-dtrace.dtrace")
-	@cmp -s $@ trace-dtrace.dtrace || cp $@ trace-dtrace.dtrace
-
-trace-dtrace.o: trace-dtrace.dtrace $(GENERATED_HEADERS)
-	$(call quiet-command,dtrace -o $@ -G -s $<, "  GEN   trace-dtrace.o")
-
-ifeq ($(LIBTOOL),)
-trace-dtrace.lo: trace-dtrace.dtrace
-	@echo "missing libtool. please install and rerun configure."; exit 1
-else
-trace-dtrace.lo: trace-dtrace.dtrace
-	$(call quiet-command,$(LIBTOOL) --mode=compile --tag=CC dtrace -o $@ -G -s $<, "  lt GEN trace-dtrace.o")
-endif
-
-trace/simple.o: trace/simple.c $(GENERATED_HEADERS)
-
-trace-obj-$(CONFIG_TRACE_DTRACE) += trace-dtrace.o
-ifneq ($(TRACE_BACKEND),dtrace)
-trace-obj-y = trace.o
-endif
-
-trace-obj-$(CONFIG_TRACE_DEFAULT) += trace/default.o
-trace-obj-$(CONFIG_TRACE_SIMPLE) += trace/simple.o
-trace-obj-$(CONFIG_TRACE_SIMPLE) += qemu-timer-common.o
-trace-obj-$(CONFIG_TRACE_STDERR) += trace/stderr.o
-trace-obj-y += trace/control.o
-
-$(trace-obj-y): $(GENERATED_HEADERS)
+trace-obj-y += trace/
 
 universal-obj-y += $(trace-obj-y)
 
@@ -239,5 +180,6 @@ nested-vars += \
 	user-obj-y \
 	common-obj-y \
 	universal-obj-y \
-	extra-obj-y
+	extra-obj-y \
+	trace-obj-y
 dummy := $(call unnest-vars)
diff --git a/scripts/tracetool/backend/dtrace.py b/scripts/tracetool/backend/dtrace.py
index 23c43e2..ad5eb3b 100644
--- a/scripts/tracetool/backend/dtrace.py
+++ b/scripts/tracetool/backend/dtrace.py
@@ -37,7 +37,7 @@ def c(events):
 
 
 def h(events):
-    out('#include "trace-dtrace.h"',
+    out('#include "trace/generated-tracers-dtrace.h"',
         '')
 
     for e in events:
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
index 6ffb3c2..9a58de1 100644
--- a/scripts/tracetool/format/h.py
+++ b/scripts/tracetool/format/h.py
@@ -19,8 +19,8 @@ from tracetool import out
 def begin(events):
     out('/* This file is autogenerated by tracetool, do not edit. */',
         '',
-        '#ifndef TRACE_H',
-        '#define TRACE_H',
+        '#ifndef TRACE__GENERATED_TRACERS_H',
+        '#define TRACE__GENERATED_TRACERS_H',
         '',
         '#include "qemu-common.h"')
 
@@ -32,7 +32,7 @@ def end(events):
             enabled = 1
         out('#define TRACE_%s_ENABLED %d' % (e.name.upper(), enabled))
     out('',
-        '#endif /* TRACE_H */')
+        '#endif /* TRACE__GENERATED_TRACERS_H */')
 
 def nop(events):
     for e in events:
diff --git a/trace.h b/trace.h
new file mode 100644
index 0000000..c15f498
--- /dev/null
+++ b/trace.h
@@ -0,0 +1,6 @@
+#ifndef TRACE_H
+#define TRACE_H
+
+#include "trace/generated-tracers.h"
+
+#endif  /* TRACE_H */
diff --git a/trace/Makefile.objs b/trace/Makefile.objs
new file mode 100644
index 0000000..b791723
--- /dev/null
+++ b/trace/Makefile.objs
@@ -0,0 +1,70 @@
+# -*- mode: makefile -*-
+
+######################################################################
+# Auto-generated tracing routines
+
+ifeq ($(TRACE_BACKEND),dtrace)
+TRACE_H_EXTRA_DEPS=$(obj)/generated-tracers-dtrace.h
+endif
+$(obj)/generated-tracers.h: $(obj)/generated-tracers.h-timestamp $(TRACE_H_EXTRA_DEPS)
+$(obj)/generated-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+	$(call quiet-command,$(TRACETOOL) \
+		--format=h \
+		--backend=$(TRACE_BACKEND) \
+		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+	@cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+$(obj)/generated-tracers.c: $(obj)/generated-tracers.c-timestamp
+$(obj)/generated-tracers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+	$(call quiet-command,$(TRACETOOL) \
+		--format=c \
+		--backend=$(TRACE_BACKEND) \
+		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+	@cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+$(obj)/generated-tracers.o: $(obj)/generated-tracers.c $(obj)/generated-tracers.h
+
+ifneq ($(TRACE_BACKEND),dtrace)
+trace-obj-y += generated-tracers.o
+endif
+
+
+######################################################################
+# Auto-generated DTrace code
+
+# Normal practice is to name DTrace probe file with a '.d' extension
+# but that gets picked up by QEMU's Makefile as an external dependency
+# rule file. So we use '.dtrace' instead
+$(obj)/generated-tracers-dtrace.dtrace: $(obj)/generated-tracers-dtrace.dtrace-timestamp
+$(obj)/generated-tracers-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
+	$(call quiet-command,$(TRACETOOL) \
+		--format=d \
+		--backend=$(TRACE_BACKEND) \
+		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+	@cmp -s $@ $(patsubst %-timestamp,%,$@) || cp $@ $(patsubst %-timestamp,%,$@)
+
+$(obj)/generated-tracers-dtrace.h: trace/generated-tracers-dtrace.dtrace
+	$(call quiet-command,dtrace -o $@ -h -s $<, "  GEN   $@")
+
+$(obj)/generated-tracers-dtrace.o: trace/generated-tracers-dtrace.dtrace
+	$(call quiet-command,dtrace -o $@ -G -s $<, "  GEN   $@")
+
+trace-obj-$(CONFIG_TRACE_DTRACE) += generated-tracers-dtrace.o
+
+
+ifeq ($(LIBTOOL),)
+$(obj)/generated-tracers-dtrace.lo: $(obj)/generated-tracers-dtrace.dtrace
+	@echo "missing libtool. please install and rerun configure."; exit 1
+else
+$(obj)/generated-tracers-dtrace.lo: $(obj)/generated-tracers-dtrace.dtrace
+	$(call quiet-command,$(LIBTOOL) --mode=compile --tag=CC dtrace -o $@ -G -s $<, "  lt GEN $@")
+endif
+
+
+######################################################################
+# Backend code
+
+trace-obj-$(CONFIG_TRACE_DEFAULT) += default.o
+trace-obj-$(CONFIG_TRACE_SIMPLE) += simple.o
+trace-obj-$(CONFIG_TRACE_STDERR) += stderr.o
+trace-obj-y += control.o
commit 6265e4ff327763b6362cba472e2b46f2dcf18762
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Fri Nov 23 12:12:01 2012 +0100

    win32: Switch thread abstraction to us TLS variable internally
    
    We already depend on working __thread support for coroutines, so this
    complication here is no longer needed.
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c
index 8037b39..517878d 100644
--- a/qemu-thread-win32.c
+++ b/qemu-thread-win32.c
@@ -239,7 +239,7 @@ struct QemuThreadData {
     CRITICAL_SECTION  cs;
 };
 
-static int qemu_thread_tls_index = TLS_OUT_OF_INDEXES;
+static __thread QemuThreadData *qemu_thread_data;
 
 static unsigned __stdcall win32_start_routine(void *arg)
 {
@@ -251,14 +251,15 @@ static unsigned __stdcall win32_start_routine(void *arg)
         g_free(data);
         data = NULL;
     }
-    TlsSetValue(qemu_thread_tls_index, data);
+    qemu_thread_data = data;
     qemu_thread_exit(start_routine(thread_arg));
     abort();
 }
 
 void qemu_thread_exit(void *arg)
 {
-    QemuThreadData *data = TlsGetValue(qemu_thread_tls_index);
+    QemuThreadData *data = qemu_thread_data;
+
     if (data) {
         assert(data->mode != QEMU_THREAD_DETACHED);
         data->ret = arg;
@@ -298,25 +299,13 @@ void *qemu_thread_join(QemuThread *thread)
     return ret;
 }
 
-static inline void qemu_thread_init(void)
-{
-    if (qemu_thread_tls_index == TLS_OUT_OF_INDEXES) {
-        qemu_thread_tls_index = TlsAlloc();
-        if (qemu_thread_tls_index == TLS_OUT_OF_INDEXES) {
-            error_exit(ERROR_NO_SYSTEM_RESOURCES, __func__);
-        }
-    }
-}
-
-
 void qemu_thread_create(QemuThread *thread,
                        void *(*start_routine)(void *),
                        void *arg, int mode)
 {
     HANDLE hThread;
-
     struct QemuThreadData *data;
-    qemu_thread_init();
+
     data = g_malloc(sizeof *data);
     data->start_routine = start_routine;
     data->arg = arg;
@@ -338,8 +327,7 @@ void qemu_thread_create(QemuThread *thread,
 
 void qemu_thread_get_self(QemuThread *thread)
 {
-    qemu_thread_init();
-    thread->data = TlsGetValue(qemu_thread_tls_index);
+    thread->data = qemu_thread_data;
     thread->tid = GetCurrentThreadId();
 }
 
commit 57f26ae72983095d0258e391041dfb8864f769e5
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Dec 20 16:43:48 2012 -0200

    target-i386: CPUID: return highest basic leaf if eax > cpuid_xlevel
    
    This fixes a subtle bug. A bug that probably won't cause trouble for any
    existing OS, but a bug anyway:
    
    Intel SDM Volume 2, CPUID Instruction states:
    
    > Two types of information are returned: basic and extended function
    > information. If a value entered for CPUID.EAX is higher than the maximum
    > input value for basic or extended function for that processor then the
    > data for the highest basic information leaf is returned. For example,
    > using the Intel Core i7 processor, the following is true:
    >
    >   CPUID.EAX = 05H (* Returns MONITOR/MWAIT leaf. *)
    >   CPUID.EAX = 0AH (* Returns Architectural Performance Monitoring leaf. *)
    >   CPUID.EAX = 0BH (* Returns Extended Topology Enumeration leaf. *)
    >   CPUID.EAX = 0CH (* INVALID: Returns the same information as CPUID.EAX = 0BH. *)
    >   CPUID.EAX = 80000008H (* Returns linear/physical address size data. *)
    >   CPUID.EAX = 8000000AH (* INVALID: Returns same information as CPUID.EAX = 0BH. *)
    
    AMD's CPUID Specification, on the other hand, is less specific:
    
    > The CPUID instruction supports two sets or ranges of functions,
    > standard and extended.
    >
    > â€¢ The smallest function number of the standard function range is
    >   Fn0000_0000. The largest function num- ber of the standard function
    >   range, for a particular implementation, is returned in CPUID
    >   Fn0000_0000_EAX.
    >
    > â€¢ The smallest function number of the extended function range is
    >   Fn8000_0000. The largest function num- ber of the extended function
    >   range, for a particular implementation, is returned in CPUID
    >   Fn8000_0000_EAX.
    >
    > Functions that are neither standard nor extended are undefined and
    > should not be relied upon.
    
    QEMU's behavior matched Intel's specification before, but this was
    changed by commit b3baa152aaef1905876670590275c2dd0bbb088c. This patch
    restores the behavior documented by Intel when cpuid_xlevel2 is 0.
    
    The existing behavior when cpuid_xlevel2 is set (falling back to
    level=cpuid_xlevel) is being kept, as I couldn't find any public
    documentation on the CPUID 0xC0000000 function range on Centaur CPUs.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 1837f5a..3cd1cee 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1648,7 +1648,11 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                     index = env->cpuid_xlevel;
                 }
             } else {
-                index =  env->cpuid_xlevel;
+                /* Intel documentation states that invalid EAX input will
+                 * return the same information as EAX=cpuid_level
+                 * (Intel SDM Vol. 2A - Instruction Set Reference - CPUID)
+                 */
+                index =  env->cpuid_level;
             }
         }
     } else {
commit 586502189edf9fd0f89a83de96717a2ea826fdb0
Author: Lei Li <lilei at linux.vnet.ibm.com>
Date:   Fri Dec 21 12:26:38 2012 +0800

    qemu-char: Inherit ptys and improve output from -serial pty
    
    Changes since V1:
      - Avoid crashing since qemu_opts_id() may return null on some
        systems according to Markus's suggestion.
    
    When controlling a qemu instance from another program, it's
    hard to know which serial port or monitor device is redirected
    to which pty. With more than one device using "pty" a lot of
    guesswork is involved.
    
    $ ./x86_64-softmmu/qemu-system-x86_64 -serial pty -serial pty -monitor pty
    char device redirected to /dev/pts/5
    char device redirected to /dev/pts/6
    char device redirected to /dev/pts/7
    
    Although we can find out what everything else is connected to
    by the "info chardev" with "-monitor stdio" in the command line,
    It'd be very useful to be able to have qemu inherit pseudo-tty
    file descriptors so they could just be specified on the command
    line like:
    
    $ ./x86_64-softmmu/qemu-system-x86_64 -serial pty -serial pty -monitor pty
    char device compat_monitor0 redirected to /dev/pts/5
    char device serial0 redirected to /dev/pts/6
    char device serial1 redirected to /dev/pts/7
    
    Referred link: https://bugs.launchpad.net/qemu/+bug/938552
    
    Signed-off-by: Lei Li <lilei at linux.vnet.ibm.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/qemu-char.c b/qemu-char.c
index 6113d0a..c6382a9 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -980,6 +980,7 @@ static CharDriverState *qemu_chr_open_pty(QemuOpts *opts)
     CharDriverState *chr;
     PtyCharDriver *s;
     struct termios tty;
+    const char *label;
     int master_fd, slave_fd, len;
 #if defined(__OpenBSD__) || defined(__DragonFly__)
     char pty_name[PATH_MAX];
@@ -1005,7 +1006,12 @@ static CharDriverState *qemu_chr_open_pty(QemuOpts *opts)
     chr->filename = g_malloc(len);
     snprintf(chr->filename, len, "pty:%s", q_ptsname(master_fd));
     qemu_opt_set(opts, "path", q_ptsname(master_fd));
-    fprintf(stderr, "char device redirected to %s\n", q_ptsname(master_fd));
+
+    label = qemu_opts_id(opts);
+    fprintf(stderr, "char device%s%s redirected to %s\n",
+            label ? " " : "",
+            label ?: "",
+            q_ptsname(master_fd));
 
     s = g_malloc0(sizeof(PtyCharDriver));
     chr->opaque = s;
commit 501a7ce7270955be151c442c27620fa7af2f3ce5
Merge: 62e0c09 36f25d2
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sun Dec 23 00:39:34 2012 +0100

    Merge branch 'master' of git://git.qemu.org/qemu into qom-cpu
    
    Adapt header include paths.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --cc hw/ppc.h
index ec33f9c,17005c7..e73ae83
--- a/hw/ppc.h
+++ b/hw/ppc.h
@@@ -1,4 -1,7 +1,7 @@@
+ #ifndef HW_PPC_H
+ #define HW_PPC_H 1
+ 
 -void ppc_set_irq (CPUPPCState *env, int n_IRQ, int level);
 +void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level);
  
  /* PowerPC hardware exceptions management helpers */
  typedef void (*clk_setup_cb)(void *opaque, uint32_t freq);
@@@ -89,4 -92,6 +92,6 @@@ enum 
  #define PPC_SERIAL_MM_BAUDBASE 399193
  
  /* ppc_booke.c */
 -void ppc_booke_timers_init(CPUPPCState *env, uint32_t freq, uint32_t flags);
 +void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags);
+ 
+ #endif
diff --cc hw/ppc4xx.h
index 5d891ae,2b96d47..59dba9e
--- a/hw/ppc4xx.h
+++ b/hw/ppc4xx.h
@@@ -25,12 -25,12 +25,12 @@@
  #if !defined(PPC_4XX_H)
  #define PPC_4XX_H
  
- #include "pci.h"
+ #include "pci/pci.h"
  
  /* PowerPC 4xx core initialization */
 -CPUPPCState *ppc4xx_init (const char *cpu_model,
 -                       clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
 -                       uint32_t sysclk);
 +PowerPCCPU *ppc4xx_init(const char *cpu_model,
 +                        clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
 +                        uint32_t sysclk);
  
  /* PowerPC 4xx universal interrupt controller */
  enum {
diff --cc hw/qdev-properties-system.c
index 86b4cf6,0000000..c73c713
mode 100644,000000..100644
--- a/hw/qdev-properties-system.c
+++ b/hw/qdev-properties-system.c
@@@ -1,357 -1,0 +1,358 @@@
 +/*
 + * qdev property parsing and global properties
 + * (parts specific for qemu-system-*)
 + *
 + * This file is based on code from hw/qdev-properties.c from
 + * commit 074a86fccd185616469dfcdc0e157f438aebba18,
 + * Copyright (c) Gerd Hoffmann <kraxel at redhat.com> and other contributors.
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + */
 +
- #include "net.h"
++#include "net/net.h"
 +#include "qdev.h"
- #include "qerror.h"
- #include "blockdev.h"
++#include "qapi/qmp/qerror.h"
++#include "sysemu/blockdev.h"
 +#include "hw/block-common.h"
 +#include "net/hub.h"
- #include "qapi/qapi-visit-core.h"
++#include "qapi/visitor.h"
++#include "char/char.h"
 +
 +static void get_pointer(Object *obj, Visitor *v, Property *prop,
 +                        const char *(*print)(void *ptr),
 +                        const char *name, Error **errp)
 +{
 +    DeviceState *dev = DEVICE(obj);
 +    void **ptr = qdev_get_prop_ptr(dev, prop);
 +    char *p;
 +
 +    p = (char *) (*ptr ? print(*ptr) : "");
 +    visit_type_str(v, &p, name, errp);
 +}
 +
 +static void set_pointer(Object *obj, Visitor *v, Property *prop,
 +                        int (*parse)(DeviceState *dev, const char *str,
 +                                     void **ptr),
 +                        const char *name, Error **errp)
 +{
 +    DeviceState *dev = DEVICE(obj);
 +    Error *local_err = NULL;
 +    void **ptr = qdev_get_prop_ptr(dev, prop);
 +    char *str;
 +    int ret;
 +
 +    if (dev->state != DEV_STATE_CREATED) {
 +        error_set(errp, QERR_PERMISSION_DENIED);
 +        return;
 +    }
 +
 +    visit_type_str(v, &str, name, &local_err);
 +    if (local_err) {
 +        error_propagate(errp, local_err);
 +        return;
 +    }
 +    if (!*str) {
 +        g_free(str);
 +        *ptr = NULL;
 +        return;
 +    }
 +    ret = parse(dev, str, ptr);
 +    error_set_from_qdev_prop_error(errp, ret, dev, prop, str);
 +    g_free(str);
 +}
 +
 +/* --- drive --- */
 +
 +static int parse_drive(DeviceState *dev, const char *str, void **ptr)
 +{
 +    BlockDriverState *bs;
 +
 +    bs = bdrv_find(str);
 +    if (bs == NULL) {
 +        return -ENOENT;
 +    }
 +    if (bdrv_attach_dev(bs, dev) < 0) {
 +        return -EEXIST;
 +    }
 +    *ptr = bs;
 +    return 0;
 +}
 +
 +static void release_drive(Object *obj, const char *name, void *opaque)
 +{
 +    DeviceState *dev = DEVICE(obj);
 +    Property *prop = opaque;
 +    BlockDriverState **ptr = qdev_get_prop_ptr(dev, prop);
 +
 +    if (*ptr) {
 +        bdrv_detach_dev(*ptr, dev);
 +        blockdev_auto_del(*ptr);
 +    }
 +}
 +
 +static const char *print_drive(void *ptr)
 +{
 +    return bdrv_get_device_name(ptr);
 +}
 +
 +static void get_drive(Object *obj, Visitor *v, void *opaque,
 +                      const char *name, Error **errp)
 +{
 +    get_pointer(obj, v, opaque, print_drive, name, errp);
 +}
 +
 +static void set_drive(Object *obj, Visitor *v, void *opaque,
 +                      const char *name, Error **errp)
 +{
 +    set_pointer(obj, v, opaque, parse_drive, name, errp);
 +}
 +
 +PropertyInfo qdev_prop_drive = {
 +    .name  = "drive",
 +    .get   = get_drive,
 +    .set   = set_drive,
 +    .release = release_drive,
 +};
 +
 +/* --- character device --- */
 +
 +static int parse_chr(DeviceState *dev, const char *str, void **ptr)
 +{
 +    CharDriverState *chr = qemu_chr_find(str);
 +    if (chr == NULL) {
 +        return -ENOENT;
 +    }
 +    if (chr->avail_connections < 1) {
 +        return -EEXIST;
 +    }
 +    *ptr = chr;
 +    --chr->avail_connections;
 +    return 0;
 +}
 +
 +static void release_chr(Object *obj, const char *name, void *opaque)
 +{
 +    DeviceState *dev = DEVICE(obj);
 +    Property *prop = opaque;
 +    CharDriverState **ptr = qdev_get_prop_ptr(dev, prop);
 +
 +    if (*ptr) {
 +        qemu_chr_add_handlers(*ptr, NULL, NULL, NULL, NULL);
 +    }
 +}
 +
 +
 +static const char *print_chr(void *ptr)
 +{
 +    CharDriverState *chr = ptr;
 +
 +    return chr->label ? chr->label : "";
 +}
 +
 +static void get_chr(Object *obj, Visitor *v, void *opaque,
 +                    const char *name, Error **errp)
 +{
 +    get_pointer(obj, v, opaque, print_chr, name, errp);
 +}
 +
 +static void set_chr(Object *obj, Visitor *v, void *opaque,
 +                    const char *name, Error **errp)
 +{
 +    set_pointer(obj, v, opaque, parse_chr, name, errp);
 +}
 +
 +PropertyInfo qdev_prop_chr = {
 +    .name  = "chr",
 +    .get   = get_chr,
 +    .set   = set_chr,
 +    .release = release_chr,
 +};
 +
 +/* --- netdev device --- */
 +
 +static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
 +{
 +    NetClientState *netdev = qemu_find_netdev(str);
 +
 +    if (netdev == NULL) {
 +        return -ENOENT;
 +    }
 +    if (netdev->peer) {
 +        return -EEXIST;
 +    }
 +    *ptr = netdev;
 +    return 0;
 +}
 +
 +static const char *print_netdev(void *ptr)
 +{
 +    NetClientState *netdev = ptr;
 +
 +    return netdev->name ? netdev->name : "";
 +}
 +
 +static void get_netdev(Object *obj, Visitor *v, void *opaque,
 +                       const char *name, Error **errp)
 +{
 +    get_pointer(obj, v, opaque, print_netdev, name, errp);
 +}
 +
 +static void set_netdev(Object *obj, Visitor *v, void *opaque,
 +                       const char *name, Error **errp)
 +{
 +    set_pointer(obj, v, opaque, parse_netdev, name, errp);
 +}
 +
 +PropertyInfo qdev_prop_netdev = {
 +    .name  = "netdev",
 +    .get   = get_netdev,
 +    .set   = set_netdev,
 +};
 +
 +/* --- vlan --- */
 +
 +static int print_vlan(DeviceState *dev, Property *prop, char *dest, size_t len)
 +{
 +    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
 +
 +    if (*ptr) {
 +        int id;
 +        if (!net_hub_id_for_client(*ptr, &id)) {
 +            return snprintf(dest, len, "%d", id);
 +        }
 +    }
 +
 +    return snprintf(dest, len, "<null>");
 +}
 +
 +static void get_vlan(Object *obj, Visitor *v, void *opaque,
 +                     const char *name, Error **errp)
 +{
 +    DeviceState *dev = DEVICE(obj);
 +    Property *prop = opaque;
 +    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
 +    int32_t id = -1;
 +
 +    if (*ptr) {
 +        int hub_id;
 +        if (!net_hub_id_for_client(*ptr, &hub_id)) {
 +            id = hub_id;
 +        }
 +    }
 +
 +    visit_type_int32(v, &id, name, errp);
 +}
 +
 +static void set_vlan(Object *obj, Visitor *v, void *opaque,
 +                     const char *name, Error **errp)
 +{
 +    DeviceState *dev = DEVICE(obj);
 +    Property *prop = opaque;
 +    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
 +    Error *local_err = NULL;
 +    int32_t id;
 +    NetClientState *hubport;
 +
 +    if (dev->state != DEV_STATE_CREATED) {
 +        error_set(errp, QERR_PERMISSION_DENIED);
 +        return;
 +    }
 +
 +    visit_type_int32(v, &id, name, &local_err);
 +    if (local_err) {
 +        error_propagate(errp, local_err);
 +        return;
 +    }
 +    if (id == -1) {
 +        *ptr = NULL;
 +        return;
 +    }
 +
 +    hubport = net_hub_port_find(id);
 +    if (!hubport) {
 +        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
 +                  name, prop->info->name);
 +        return;
 +    }
 +    *ptr = hubport;
 +}
 +
 +PropertyInfo qdev_prop_vlan = {
 +    .name  = "vlan",
 +    .print = print_vlan,
 +    .get   = get_vlan,
 +    .set   = set_vlan,
 +};
 +
 +int qdev_prop_set_drive(DeviceState *dev, const char *name,
 +                        BlockDriverState *value)
 +{
 +    Error *errp = NULL;
 +    const char *bdrv_name = value ? bdrv_get_device_name(value) : "";
 +    object_property_set_str(OBJECT(dev), bdrv_name,
 +                            name, &errp);
 +    if (errp) {
 +        qerror_report_err(errp);
 +        error_free(errp);
 +        return -1;
 +    }
 +    return 0;
 +}
 +
 +void qdev_prop_set_drive_nofail(DeviceState *dev, const char *name,
 +                                BlockDriverState *value)
 +{
 +    if (qdev_prop_set_drive(dev, name, value) < 0) {
 +        exit(1);
 +    }
 +}
 +void qdev_prop_set_chr(DeviceState *dev, const char *name,
 +                       CharDriverState *value)
 +{
 +    Error *errp = NULL;
 +    assert(!value || value->label);
 +    object_property_set_str(OBJECT(dev),
 +                            value ? value->label : "", name, &errp);
 +    assert_no_error(errp);
 +}
 +
 +void qdev_prop_set_netdev(DeviceState *dev, const char *name,
 +                          NetClientState *value)
 +{
 +    Error *errp = NULL;
 +    assert(!value || value->name);
 +    object_property_set_str(OBJECT(dev),
 +                            value ? value->name : "", name, &errp);
 +    assert_no_error(errp);
 +}
 +
 +void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
 +{
 +    qdev_prop_set_macaddr(dev, "mac", nd->macaddr.a);
 +    if (nd->netdev) {
 +        qdev_prop_set_netdev(dev, "netdev", nd->netdev);
 +    }
 +    if (nd->nvectors != DEV_NVECTORS_UNSPECIFIED &&
 +        object_property_find(OBJECT(dev), "vectors", NULL)) {
 +        qdev_prop_set_uint32(dev, "vectors", nd->nvectors);
 +    }
 +    nd->instantiated = 1;
 +}
 +
 +static int qdev_add_one_global(QemuOpts *opts, void *opaque)
 +{
 +    GlobalProperty *g;
 +
 +    g = g_malloc0(sizeof(*g));
 +    g->driver   = qemu_opt_get(opts, "driver");
 +    g->property = qemu_opt_get(opts, "property");
 +    g->value    = qemu_opt_get(opts, "value");
 +    qdev_prop_register_global(g);
 +    return 0;
 +}
 +
 +void qemu_add_globals(void)
 +{
 +    qemu_opts_foreach(qemu_find_opts("global"), qdev_add_one_global, NULL, 0);
 +}
diff --cc hw/qdev.c
index fa0af21,c4a9857..f2c2484
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@@ -25,10 -25,11 +25,10 @@@
     inherit from a particular bus (e.g. PCI or I2C) rather than
     this API directly.  */
  
 -#include "net/net.h"
  #include "qdev.h"
- #include "sysemu.h"
- #include "error.h"
- #include "qapi/qapi-visit-core.h"
+ #include "sysemu/sysemu.h"
+ #include "qapi/error.h"
+ #include "qapi/visitor.h"
  
  int qdev_hotplug = 0;
  static bool qdev_hot_added = false;
diff --cc include/exec/cpu-common.h
index 0000000,4d4f8d4..2e5f11f
mode 000000,100644..100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@@ -1,0 -1,112 +1,124 @@@
+ #ifndef CPU_COMMON_H
+ #define CPU_COMMON_H 1
+ 
+ /* CPU interfaces that are target independent.  */
+ 
+ #include "exec/hwaddr.h"
+ 
+ #ifndef NEED_CPU_H
+ #include "exec/poison.h"
+ #endif
+ 
+ #include "qemu/bswap.h"
+ #include "qemu/queue.h"
+ 
++/**
++ * CPUListState:
++ * @cpu_fprintf: Print function.
++ * @file: File to print to using @cpu_fprint.
++ *
++ * State commonly used for iterating over CPU models.
++ */
++typedef struct CPUListState {
++    fprintf_function cpu_fprintf;
++    FILE *file;
++} CPUListState;
++
+ #if !defined(CONFIG_USER_ONLY)
+ 
+ enum device_endian {
+     DEVICE_NATIVE_ENDIAN,
+     DEVICE_BIG_ENDIAN,
+     DEVICE_LITTLE_ENDIAN,
+ };
+ 
+ /* address in the RAM (different from a physical address) */
+ #if defined(CONFIG_XEN_BACKEND)
+ typedef uint64_t ram_addr_t;
+ #  define RAM_ADDR_MAX UINT64_MAX
+ #  define RAM_ADDR_FMT "%" PRIx64
+ #else
+ typedef uintptr_t ram_addr_t;
+ #  define RAM_ADDR_MAX UINTPTR_MAX
+ #  define RAM_ADDR_FMT "%" PRIxPTR
+ #endif
+ 
+ /* memory API */
+ 
+ typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
+ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
+ 
+ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+ /* This should only be used for ram local to a device.  */
+ void *qemu_get_ram_ptr(ram_addr_t addr);
+ void qemu_put_ram_ptr(void *addr);
+ /* This should not be used by devices.  */
+ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+ ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
+ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
+ 
+ void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
+                             int len, int is_write);
+ static inline void cpu_physical_memory_read(hwaddr addr,
+                                             void *buf, int len)
+ {
+     cpu_physical_memory_rw(addr, buf, len, 0);
+ }
+ static inline void cpu_physical_memory_write(hwaddr addr,
+                                              const void *buf, int len)
+ {
+     cpu_physical_memory_rw(addr, (void *)buf, len, 1);
+ }
+ void *cpu_physical_memory_map(hwaddr addr,
+                               hwaddr *plen,
+                               int is_write);
+ void cpu_physical_memory_unmap(void *buffer, hwaddr len,
+                                int is_write, hwaddr access_len);
+ void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
+ 
+ bool cpu_physical_memory_is_io(hwaddr phys_addr);
+ 
+ /* Coalesced MMIO regions are areas where write operations can be reordered.
+  * This usually implies that write operations are side-effect free.  This allows
+  * batching which can make a major impact on performance when using
+  * virtualization.
+  */
+ void qemu_flush_coalesced_mmio_buffer(void);
+ 
+ uint32_t ldub_phys(hwaddr addr);
+ uint32_t lduw_le_phys(hwaddr addr);
+ uint32_t lduw_be_phys(hwaddr addr);
+ uint32_t ldl_le_phys(hwaddr addr);
+ uint32_t ldl_be_phys(hwaddr addr);
+ uint64_t ldq_le_phys(hwaddr addr);
+ uint64_t ldq_be_phys(hwaddr addr);
+ void stb_phys(hwaddr addr, uint32_t val);
+ void stw_le_phys(hwaddr addr, uint32_t val);
+ void stw_be_phys(hwaddr addr, uint32_t val);
+ void stl_le_phys(hwaddr addr, uint32_t val);
+ void stl_be_phys(hwaddr addr, uint32_t val);
+ void stq_le_phys(hwaddr addr, uint64_t val);
+ void stq_be_phys(hwaddr addr, uint64_t val);
+ 
+ #ifdef NEED_CPU_H
+ uint32_t lduw_phys(hwaddr addr);
+ uint32_t ldl_phys(hwaddr addr);
+ uint64_t ldq_phys(hwaddr addr);
+ void stl_phys_notdirty(hwaddr addr, uint32_t val);
+ void stq_phys_notdirty(hwaddr addr, uint64_t val);
+ void stw_phys(hwaddr addr, uint32_t val);
+ void stl_phys(hwaddr addr, uint32_t val);
+ void stq_phys(hwaddr addr, uint64_t val);
+ #endif
+ 
+ void cpu_physical_memory_write_rom(hwaddr addr,
+                                    const uint8_t *buf, int len);
+ 
+ extern struct MemoryRegion io_mem_ram;
+ extern struct MemoryRegion io_mem_rom;
+ extern struct MemoryRegion io_mem_unassigned;
+ extern struct MemoryRegion io_mem_notdirty;
+ 
+ #endif
+ 
+ #endif /* !CPU_COMMON_H */
diff --cc include/exec/cpu-defs.h
index 0000000,aea0ece..b22b4c6
mode 000000,100644..100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@@ -1,0 -1,213 +1,207 @@@
+ /*
+  * common defines for all CPUs
+  *
+  * Copyright (c) 2003 Fabrice Bellard
+  *
+  * This library is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2 of the License, or (at your option) any later version.
+  *
+  * This library is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
+  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+  */
+ #ifndef CPU_DEFS_H
+ #define CPU_DEFS_H
+ 
+ #ifndef NEED_CPU_H
+ #error cpu.h included from common code
+ #endif
+ 
+ #include "config.h"
+ #include <setjmp.h>
+ #include <inttypes.h>
+ #include <signal.h>
+ #include "qemu/osdep.h"
+ #include "qemu/queue.h"
+ #include "exec/hwaddr.h"
+ 
+ #ifndef TARGET_LONG_BITS
+ #error TARGET_LONG_BITS must be defined before including this header
+ #endif
+ 
+ #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
+ 
+ typedef int16_t target_short __attribute__ ((aligned(TARGET_SHORT_ALIGNMENT)));
+ typedef uint16_t target_ushort __attribute__((aligned(TARGET_SHORT_ALIGNMENT)));
+ typedef int32_t target_int __attribute__((aligned(TARGET_INT_ALIGNMENT)));
+ typedef uint32_t target_uint __attribute__((aligned(TARGET_INT_ALIGNMENT)));
+ typedef int64_t target_llong __attribute__((aligned(TARGET_LLONG_ALIGNMENT)));
+ typedef uint64_t target_ullong __attribute__((aligned(TARGET_LLONG_ALIGNMENT)));
+ /* target_ulong is the type of a virtual address */
+ #if TARGET_LONG_SIZE == 4
+ typedef int32_t target_long __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
+ typedef uint32_t target_ulong __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
+ #define TARGET_FMT_lx "%08x"
+ #define TARGET_FMT_ld "%d"
+ #define TARGET_FMT_lu "%u"
+ #elif TARGET_LONG_SIZE == 8
+ typedef int64_t target_long __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
+ typedef uint64_t target_ulong __attribute__((aligned(TARGET_LONG_ALIGNMENT)));
+ #define TARGET_FMT_lx "%016" PRIx64
+ #define TARGET_FMT_ld "%" PRId64
+ #define TARGET_FMT_lu "%" PRIu64
+ #else
+ #error TARGET_LONG_SIZE undefined
+ #endif
+ 
+ #define EXCP_INTERRUPT 	0x10000 /* async interruption */
+ #define EXCP_HLT        0x10001 /* hlt instruction reached */
+ #define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
+ #define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
+ 
+ #define TB_JMP_CACHE_BITS 12
+ #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
+ 
+ /* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
+    addresses on the same page.  The top bits are the same.  This allows
+    TLB invalidation to quickly clear a subset of the hash table.  */
+ #define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2)
+ #define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS)
+ #define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
+ #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
+ 
+ #if !defined(CONFIG_USER_ONLY)
+ #define CPU_TLB_BITS 8
+ #define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+ 
+ #if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+ #define CPU_TLB_ENTRY_BITS 4
+ #else
+ #define CPU_TLB_ENTRY_BITS 5
+ #endif
+ 
+ typedef struct CPUTLBEntry {
+     /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
+        bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not
+                                     go directly to ram.
+        bit 3                      : indicates that the entry is invalid
+        bit 2..0                   : zero
+     */
+     target_ulong addr_read;
+     target_ulong addr_write;
+     target_ulong addr_code;
+     /* Addend to virtual address to get host address.  IO accesses
+        use the corresponding iotlb value.  */
+     uintptr_t addend;
+     /* padding to get a power of two size */
+     uint8_t dummy[(1 << CPU_TLB_ENTRY_BITS) -
+                   (sizeof(target_ulong) * 3 +
+                    ((-sizeof(target_ulong) * 3) & (sizeof(uintptr_t) - 1)) +
+                    sizeof(uintptr_t))];
+ } CPUTLBEntry;
+ 
+ extern int CPUTLBEntry_wrong_size[sizeof(CPUTLBEntry) == (1 << CPU_TLB_ENTRY_BITS) ? 1 : -1];
+ 
+ #define CPU_COMMON_TLB \
+     /* The meaning of the MMU modes is defined in the target code. */   \
+     CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
+     hwaddr iotlb[NB_MMU_MODES][CPU_TLB_SIZE];               \
+     target_ulong tlb_flush_addr;                                        \
+     target_ulong tlb_flush_mask;
+ 
+ #else
+ 
+ #define CPU_COMMON_TLB
+ 
+ #endif
+ 
+ 
+ #ifdef HOST_WORDS_BIGENDIAN
+ typedef struct icount_decr_u16 {
+     uint16_t high;
+     uint16_t low;
+ } icount_decr_u16;
+ #else
+ typedef struct icount_decr_u16 {
+     uint16_t low;
+     uint16_t high;
+ } icount_decr_u16;
+ #endif
+ 
 -struct kvm_run;
 -struct KVMState;
+ struct qemu_work_item;
+ 
+ typedef struct CPUBreakpoint {
+     target_ulong pc;
+     int flags; /* BP_* */
+     QTAILQ_ENTRY(CPUBreakpoint) entry;
+ } CPUBreakpoint;
+ 
+ typedef struct CPUWatchpoint {
+     target_ulong vaddr;
+     target_ulong len_mask;
+     int flags; /* BP_* */
+     QTAILQ_ENTRY(CPUWatchpoint) entry;
+ } CPUWatchpoint;
+ 
+ #define CPU_TEMP_BUF_NLONGS 128
+ #define CPU_COMMON                                                      \
+     struct TranslationBlock *current_tb; /* currently executing TB  */  \
+     /* soft mmu support */                                              \
+     /* in order to avoid passing too many arguments to the MMIO         \
+        helpers, we store some rarely used information in the CPU        \
+        context) */                                                      \
+     uintptr_t mem_io_pc; /* host pc at which the memory was             \
+                             accessed */                                 \
+     target_ulong mem_io_vaddr; /* target virtual addr at which the      \
+                                      memory was accessed */             \
+     uint32_t halted; /* Nonzero if the CPU is in suspend state */       \
+     uint32_t interrupt_request;                                         \
+     volatile sig_atomic_t exit_request;                                 \
+     CPU_COMMON_TLB                                                      \
+     struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];           \
+     /* buffer for temporaries in the code generator */                  \
+     long temp_buf[CPU_TEMP_BUF_NLONGS];                                 \
+                                                                         \
+     int64_t icount_extra; /* Instructions until next timer event.  */   \
+     /* Number of cycles left, with interrupt flag in high bit.          \
+        This allows a single read-compare-cbranch-write sequence to test \
+        for both decrementer underflow and exceptions.  */               \
+     union {                                                             \
+         uint32_t u32;                                                   \
+         icount_decr_u16 u16;                                            \
+     } icount_decr;                                                      \
+     uint32_t can_do_io; /* nonzero if memory mapped IO is safe.  */     \
+                                                                         \
+     /* from this point: preserved by CPU reset */                       \
+     /* ice debug support */                                             \
+     QTAILQ_HEAD(breakpoints_head, CPUBreakpoint) breakpoints;            \
+     int singlestep_enabled;                                             \
+                                                                         \
+     QTAILQ_HEAD(watchpoints_head, CPUWatchpoint) watchpoints;            \
+     CPUWatchpoint *watchpoint_hit;                                      \
+                                                                         \
+     struct GDBRegisterState *gdb_regs;                                  \
+                                                                         \
+     /* Core interrupt code */                                           \
+     jmp_buf jmp_env;                                                    \
+     int exception_index;                                                \
+                                                                         \
+     CPUArchState *next_cpu; /* next CPU sharing TB cache */                 \
+     int cpu_index; /* CPU index (informative) */                        \
+     uint32_t host_tid; /* host thread ID */                             \
+     int numa_node; /* NUMA node this cpu is belonging to  */            \
+     int nr_cores;  /* number of cores within this CPU package */        \
+     int nr_threads;/* number of threads within this CPU */              \
+     int running; /* Nonzero if cpu is currently running(usermode).  */  \
+     /* user data */                                                     \
+     void *opaque;                                                       \
+                                                                         \
 -    const char *cpu_model_str;                                          \
 -    struct KVMState *kvm_state;                                         \
 -    struct kvm_run *kvm_run;                                            \
 -    int kvm_fd;                                                         \
 -    int kvm_vcpu_dirty;
++    const char *cpu_model_str;
+ 
+ #endif
diff --cc include/qom/cpu.h
index 0000000,9e9d044..3e9fc3a
mode 000000,100644..100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@@ -1,0 -1,140 +1,151 @@@
+ /*
+  * QEMU CPU model
+  *
+  * Copyright (c) 2012 SUSE LINUX Products GmbH
+  *
+  * This program is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU General Public License
+  * as published by the Free Software Foundation; either version 2
+  * of the License, or (at your option) any later version.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, see
+  * <http://www.gnu.org/licenses/gpl-2.0.html>
+  */
+ #ifndef QEMU_CPU_H
+ #define QEMU_CPU_H
+ 
+ #include "qom/object.h"
+ #include "qemu/thread.h"
+ 
+ /**
+  * SECTION:cpu
+  * @section_id: QEMU-cpu
+  * @title: CPU Class
+  * @short_description: Base class for all CPUs
+  */
+ 
+ #define TYPE_CPU "cpu"
+ 
+ #define CPU(obj) OBJECT_CHECK(CPUState, (obj), TYPE_CPU)
+ #define CPU_CLASS(class) OBJECT_CLASS_CHECK(CPUClass, (class), TYPE_CPU)
+ #define CPU_GET_CLASS(obj) OBJECT_GET_CLASS(CPUClass, (obj), TYPE_CPU)
+ 
+ typedef struct CPUState CPUState;
+ 
+ /**
+  * CPUClass:
+  * @reset: Callback to reset the #CPUState to its initial state.
+  *
+  * Represents a CPU family or model.
+  */
+ typedef struct CPUClass {
+     /*< private >*/
+     ObjectClass parent_class;
+     /*< public >*/
+ 
+     void (*reset)(CPUState *cpu);
+ } CPUClass;
+ 
++struct KVMState;
++struct kvm_run;
++
+ /**
+  * CPUState:
+  * @created: Indicates whether the CPU thread has been successfully created.
+  * @stop: Indicates a pending stop request.
+  * @stopped: Indicates the CPU has been artificially stopped.
++ * @kvm_fd: vCPU file descriptor for KVM.
+  *
+  * State of one CPU core or thread.
+  */
+ struct CPUState {
+     /*< private >*/
+     Object parent_obj;
+     /*< public >*/
+ 
+     struct QemuThread *thread;
+ #ifdef _WIN32
+     HANDLE hThread;
+ #endif
+     int thread_id;
+     struct QemuCond *halt_cond;
+     struct qemu_work_item *queued_work_first, *queued_work_last;
+     bool thread_kicked;
+     bool created;
+     bool stop;
+     bool stopped;
+ 
++#if !defined(CONFIG_USER_ONLY)
++    int kvm_fd;
++    bool kvm_vcpu_dirty;
++#endif
++    struct KVMState *kvm_state;
++    struct kvm_run *kvm_run;
++
+     /* TODO Move common fields from CPUArchState here. */
+ };
+ 
+ 
+ /**
+  * cpu_reset:
+  * @cpu: The CPU whose state is to be reset.
+  */
+ void cpu_reset(CPUState *cpu);
+ 
+ /**
+  * qemu_cpu_has_work:
+  * @cpu: The vCPU to check.
+  *
+  * Checks whether the CPU has work to do.
+  *
+  * Returns: %true if the CPU has work, %false otherwise.
+  */
+ bool qemu_cpu_has_work(CPUState *cpu);
+ 
+ /**
+  * qemu_cpu_is_self:
+  * @cpu: The vCPU to check against.
+  *
+  * Checks whether the caller is executing on the vCPU thread.
+  *
+  * Returns: %true if called from @cpu's thread, %false otherwise.
+  */
+ bool qemu_cpu_is_self(CPUState *cpu);
+ 
+ /**
+  * qemu_cpu_kick:
+  * @cpu: The vCPU to kick.
+  *
+  * Kicks @cpu's thread.
+  */
+ void qemu_cpu_kick(CPUState *cpu);
+ 
+ /**
+  * cpu_is_stopped:
+  * @cpu: The CPU to check.
+  *
+  * Checks whether the CPU is stopped.
+  *
+  * Returns: %true if run state is not running or if artificially stopped;
+  * %false otherwise.
+  */
+ bool cpu_is_stopped(CPUState *cpu);
+ 
+ /**
+  * run_on_cpu:
+  * @cpu: The vCPU to run on.
+  * @func: The function to be executed.
+  * @data: Data to pass to the function.
+  *
+  * Schedules the function @func for execution on the vCPU @cpu.
+  */
+ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+ 
+ 
+ #endif
diff --cc include/sysemu/kvm.h
index 0000000,131d2bd..3db19ff
mode 000000,100644..100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@@ -1,0 -1,280 +1,280 @@@
+ /*
+  * QEMU KVM support
+  *
+  * Copyright IBM, Corp. 2008
+  *
+  * Authors:
+  *  Anthony Liguori   <aliguori at us.ibm.com>
+  *
+  * This work is licensed under the terms of the GNU GPL, version 2 or later.
+  * See the COPYING file in the top-level directory.
+  *
+  */
+ 
+ #ifndef QEMU_KVM_H
+ #define QEMU_KVM_H
+ 
+ #include <errno.h>
+ #include "config-host.h"
+ #include "qemu/queue.h"
+ 
+ #ifdef CONFIG_KVM
+ #include <linux/kvm.h>
+ #include <linux/kvm_para.h>
+ #endif
+ 
+ extern int kvm_allowed;
+ extern bool kvm_kernel_irqchip;
+ extern bool kvm_async_interrupts_allowed;
+ extern bool kvm_irqfds_allowed;
+ extern bool kvm_msi_via_irqfd_allowed;
+ extern bool kvm_gsi_routing_allowed;
+ 
+ #if defined CONFIG_KVM || !defined NEED_CPU_H
+ #define kvm_enabled()           (kvm_allowed)
+ /**
+  * kvm_irqchip_in_kernel:
+  *
+  * Returns: true if the user asked us to create an in-kernel
+  * irqchip via the "kernel_irqchip=on" machine option.
+  * What this actually means is architecture and machine model
+  * specific: on PC, for instance, it means that the LAPIC,
+  * IOAPIC and PIT are all in kernel. This function should never
+  * be used from generic target-independent code: use one of the
+  * following functions or some other specific check instead.
+  */
+ #define kvm_irqchip_in_kernel() (kvm_kernel_irqchip)
+ 
+ /**
+  * kvm_async_interrupts_enabled:
+  *
+  * Returns: true if we can deliver interrupts to KVM
+  * asynchronously (ie by ioctl from any thread at any time)
+  * rather than having to do interrupt delivery synchronously
+  * (where the vcpu must be stopped at a suitable point first).
+  */
+ #define kvm_async_interrupts_enabled() (kvm_async_interrupts_allowed)
+ 
+ /**
+  * kvm_irqfds_enabled:
+  *
+  * Returns: true if we can use irqfds to inject interrupts into
+  * a KVM CPU (ie the kernel supports irqfds and we are running
+  * with a configuration where it is meaningful to use them).
+  */
+ #define kvm_irqfds_enabled() (kvm_irqfds_allowed)
+ 
+ /**
+  * kvm_msi_via_irqfd_enabled:
+  *
+  * Returns: true if we can route a PCI MSI (Message Signaled Interrupt)
+  * to a KVM CPU via an irqfd. This requires that the kernel supports
+  * this and that we're running in a configuration that permits it.
+  */
+ #define kvm_msi_via_irqfd_enabled() (kvm_msi_via_irqfd_allowed)
+ 
+ /**
+  * kvm_gsi_routing_enabled:
+  *
+  * Returns: true if GSI routing is enabled (ie the kernel supports
+  * it and we're running in a configuration that permits it).
+  */
+ #define kvm_gsi_routing_enabled() (kvm_gsi_routing_allowed)
+ 
+ #else
+ #define kvm_enabled()           (0)
+ #define kvm_irqchip_in_kernel() (false)
+ #define kvm_async_interrupts_enabled() (false)
+ #define kvm_irqfds_enabled() (false)
+ #define kvm_msi_via_irqfd_enabled() (false)
+ #define kvm_gsi_routing_allowed() (false)
+ #endif
+ 
+ struct kvm_run;
+ struct kvm_lapic_state;
+ 
+ typedef struct KVMCapabilityInfo {
+     const char *name;
+     int value;
+ } KVMCapabilityInfo;
+ 
+ #define KVM_CAP_INFO(CAP) { "KVM_CAP_" stringify(CAP), KVM_CAP_##CAP }
+ #define KVM_CAP_LAST_INFO { NULL, 0 }
+ 
+ struct KVMState;
+ typedef struct KVMState KVMState;
+ extern KVMState *kvm_state;
+ 
+ /* external API */
+ 
+ int kvm_init(void);
+ 
+ int kvm_has_sync_mmu(void);
+ int kvm_has_vcpu_events(void);
+ int kvm_has_robust_singlestep(void);
+ int kvm_has_debugregs(void);
+ int kvm_has_xsave(void);
+ int kvm_has_xcrs(void);
+ int kvm_has_pit_state2(void);
+ int kvm_has_many_ioeventfds(void);
+ int kvm_has_gsi_routing(void);
+ int kvm_has_intx_set_mask(void);
+ 
+ #ifdef NEED_CPU_H
+ int kvm_init_vcpu(CPUArchState *env);
+ 
+ int kvm_cpu_exec(CPUArchState *env);
+ 
+ #if !defined(CONFIG_USER_ONLY)
+ void *kvm_vmalloc(ram_addr_t size);
+ void *kvm_arch_vmalloc(ram_addr_t size);
+ void kvm_setup_guest_memory(void *start, size_t size);
+ 
+ void kvm_flush_coalesced_mmio_buffer(void);
+ #endif
+ 
+ int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
+                           target_ulong len, int type);
+ int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
+                           target_ulong len, int type);
+ void kvm_remove_all_breakpoints(CPUArchState *current_env);
+ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap);
+ #ifndef _WIN32
+ int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset);
+ #endif
+ 
+ int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr);
+ int kvm_on_sigbus(int code, void *addr);
+ 
+ /* internal API */
+ 
+ int kvm_ioctl(KVMState *s, int type, ...);
+ 
+ int kvm_vm_ioctl(KVMState *s, int type, ...);
+ 
 -int kvm_vcpu_ioctl(CPUArchState *env, int type, ...);
++int kvm_vcpu_ioctl(CPUState *cpu, int type, ...);
+ 
+ /* Arch specific hooks */
+ 
+ extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
+ 
 -void kvm_arch_pre_run(CPUArchState *env, struct kvm_run *run);
 -void kvm_arch_post_run(CPUArchState *env, struct kvm_run *run);
++void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run);
++void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run);
+ 
 -int kvm_arch_handle_exit(CPUArchState *env, struct kvm_run *run);
++int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run);
+ 
 -int kvm_arch_process_async_events(CPUArchState *env);
++int kvm_arch_process_async_events(CPUState *cpu);
+ 
 -int kvm_arch_get_registers(CPUArchState *env);
++int kvm_arch_get_registers(CPUState *cpu);
+ 
+ /* state subset only touched by the VCPU itself during runtime */
+ #define KVM_PUT_RUNTIME_STATE   1
+ /* state subset modified during VCPU reset */
+ #define KVM_PUT_RESET_STATE     2
+ /* full state set, modified during initialization or on vmload */
+ #define KVM_PUT_FULL_STATE      3
+ 
 -int kvm_arch_put_registers(CPUArchState *env, int level);
++int kvm_arch_put_registers(CPUState *cpu, int level);
+ 
+ int kvm_arch_init(KVMState *s);
+ 
 -int kvm_arch_init_vcpu(CPUArchState *env);
++int kvm_arch_init_vcpu(CPUState *cpu);
+ 
 -void kvm_arch_reset_vcpu(CPUArchState *env);
++void kvm_arch_reset_vcpu(CPUState *cpu);
+ 
 -int kvm_arch_on_sigbus_vcpu(CPUArchState *env, int code, void *addr);
++int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+ int kvm_arch_on_sigbus(int code, void *addr);
+ 
+ void kvm_arch_init_irq_routing(KVMState *s);
+ 
+ int kvm_set_irq(KVMState *s, int irq, int level);
+ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
+ 
+ void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
+ 
+ void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
+ void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
+ 
+ struct kvm_guest_debug;
+ struct kvm_debug_exit_arch;
+ 
+ struct kvm_sw_breakpoint {
+     target_ulong pc;
+     target_ulong saved_insn;
+     int use_count;
+     QTAILQ_ENTRY(kvm_sw_breakpoint) entry;
+ };
+ 
+ QTAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint);
+ 
 -struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
++struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
+                                                  target_ulong pc);
+ 
 -int kvm_sw_breakpoints_active(CPUArchState *env);
++int kvm_sw_breakpoints_active(CPUState *cpu);
+ 
 -int kvm_arch_insert_sw_breakpoint(CPUArchState *current_env,
++int kvm_arch_insert_sw_breakpoint(CPUState *current_cpu,
+                                   struct kvm_sw_breakpoint *bp);
 -int kvm_arch_remove_sw_breakpoint(CPUArchState *current_env,
++int kvm_arch_remove_sw_breakpoint(CPUState *current_cpu,
+                                   struct kvm_sw_breakpoint *bp);
+ int kvm_arch_insert_hw_breakpoint(target_ulong addr,
+                                   target_ulong len, int type);
+ int kvm_arch_remove_hw_breakpoint(target_ulong addr,
+                                   target_ulong len, int type);
+ void kvm_arch_remove_all_hw_breakpoints(void);
+ 
 -void kvm_arch_update_guest_debug(CPUArchState *env, struct kvm_guest_debug *dbg);
++void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg);
+ 
 -bool kvm_arch_stop_on_emulation_error(CPUArchState *env);
++bool kvm_arch_stop_on_emulation_error(CPUState *cpu);
+ 
+ int kvm_check_extension(KVMState *s, unsigned int extension);
+ 
+ uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
+                                       uint32_t index, int reg);
+ void kvm_cpu_synchronize_state(CPUArchState *env);
+ void kvm_cpu_synchronize_post_reset(CPUArchState *env);
+ void kvm_cpu_synchronize_post_init(CPUArchState *env);
+ 
+ /* generic hooks - to be moved/refactored once there are more users */
+ 
+ static inline void cpu_synchronize_state(CPUArchState *env)
+ {
+     if (kvm_enabled()) {
+         kvm_cpu_synchronize_state(env);
+     }
+ }
+ 
+ static inline void cpu_synchronize_post_reset(CPUArchState *env)
+ {
+     if (kvm_enabled()) {
+         kvm_cpu_synchronize_post_reset(env);
+     }
+ }
+ 
+ static inline void cpu_synchronize_post_init(CPUArchState *env)
+ {
+     if (kvm_enabled()) {
+         kvm_cpu_synchronize_post_init(env);
+     }
+ }
+ 
+ 
+ #if !defined(CONFIG_USER_ONLY)
+ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
+                                        hwaddr *phys_addr);
+ #endif
+ 
+ #endif
+ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign,
+                            uint32_t size);
+ 
+ int kvm_set_ioeventfd_pio_word(int fd, uint16_t adr, uint16_t val, bool assign);
+ 
+ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg);
+ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg);
+ void kvm_irqchip_release_virq(KVMState *s, int virq);
+ 
+ int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
+ int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
+ void kvm_pc_gsi_handler(void *opaque, int n, int level);
+ void kvm_pc_setup_irq_routing(bool pci_enabled);
+ #endif
diff --cc target-alpha/cpu.c
index 59d8669,11a19eb..212a625
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@@ -21,211 -21,8 +21,211 @@@
  
  #include "cpu.h"
  #include "qemu-common.h"
- #include "error.h"
++#include "qapi/error.h"
  
  
 +static void alpha_cpu_realize(Object *obj, Error **errp)
 +{
 +#ifndef CONFIG_USER_ONLY
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +
 +    qemu_init_vcpu(&cpu->env);
 +#endif
 +}
 +
 +/* Sort alphabetically by type name. */
 +static gint alpha_cpu_list_compare(gconstpointer a, gconstpointer b)
 +{
 +    ObjectClass *class_a = (ObjectClass *)a;
 +    ObjectClass *class_b = (ObjectClass *)b;
 +    const char *name_a, *name_b;
 +
 +    name_a = object_class_get_name(class_a);
 +    name_b = object_class_get_name(class_b);
 +    return strcmp(name_a, name_b);
 +}
 +
 +static void alpha_cpu_list_entry(gpointer data, gpointer user_data)
 +{
 +    ObjectClass *oc = data;
 +    CPUListState *s = user_data;
 +
 +    (*s->cpu_fprintf)(s->file, "  %s\n",
 +                      object_class_get_name(oc));
 +}
 +
 +void alpha_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 +{
 +    CPUListState s = {
 +        .file = f,
 +        .cpu_fprintf = cpu_fprintf,
 +    };
 +    GSList *list;
 +
 +    list = object_class_get_list(TYPE_ALPHA_CPU, false);
 +    list = g_slist_sort(list, alpha_cpu_list_compare);
 +    (*cpu_fprintf)(f, "Available CPUs:\n");
 +    g_slist_foreach(list, alpha_cpu_list_entry, &s);
 +    g_slist_free(list);
 +}
 +
 +/* Models */
 +
 +#define TYPE(model) model "-" TYPE_ALPHA_CPU
 +
 +typedef struct AlphaCPUAlias {
 +    const char *alias;
 +    const char *typename;
 +} AlphaCPUAlias;
 +
 +static const AlphaCPUAlias alpha_cpu_aliases[] = {
 +    { "21064",   TYPE("ev4") },
 +    { "21164",   TYPE("ev5") },
 +    { "21164a",  TYPE("ev56") },
 +    { "21164pc", TYPE("pca56") },
 +    { "21264",   TYPE("ev6") },
 +    { "21264a",  TYPE("ev67") },
 +};
 +
 +static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
 +{
 +    ObjectClass *oc = NULL;
 +    char *typename;
 +    int i;
 +
 +    if (cpu_model == NULL) {
 +        return NULL;
 +    }
 +
 +    oc = object_class_by_name(cpu_model);
 +    if (oc != NULL) {
 +        return oc;
 +    }
 +
 +    for (i = 0; i < ARRAY_SIZE(alpha_cpu_aliases); i++) {
 +        if (strcmp(cpu_model, alpha_cpu_aliases[i].alias) == 0) {
 +            oc = object_class_by_name(alpha_cpu_aliases[i].typename);
 +            assert(oc != NULL);
 +            return oc;
 +        }
 +    }
 +
 +    typename = g_strdup_printf("%s-" TYPE_ALPHA_CPU, cpu_model);
 +    oc = object_class_by_name(typename);
 +    g_free(typename);
 +    return oc;
 +}
 +
 +AlphaCPU *cpu_alpha_init(const char *cpu_model)
 +{
 +    AlphaCPU *cpu;
 +    CPUAlphaState *env;
 +    ObjectClass *cpu_class;
 +
 +    cpu_class = alpha_cpu_class_by_name(cpu_model);
 +    if (cpu_class == NULL) {
 +        /* Default to ev67; no reason not to emulate insns by default.  */
 +        cpu_class = object_class_by_name(TYPE("ev67"));
 +    }
 +    cpu = ALPHA_CPU(object_new(object_class_get_name(cpu_class)));
 +    env = &cpu->env;
 +
 +    env->cpu_model_str = cpu_model;
 +
 +    alpha_cpu_realize(OBJECT(cpu), NULL);
 +    return cpu;
 +}
 +
 +static void ev4_cpu_initfn(Object *obj)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +    CPUAlphaState *env = &cpu->env;
 +
 +    env->implver = IMPLVER_2106x;
 +}
 +
 +static const TypeInfo ev4_cpu_type_info = {
 +    .name = TYPE("ev4"),
 +    .parent = TYPE_ALPHA_CPU,
 +    .instance_init = ev4_cpu_initfn,
 +};
 +
 +static void ev5_cpu_initfn(Object *obj)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +    CPUAlphaState *env = &cpu->env;
 +
 +    env->implver = IMPLVER_21164;
 +}
 +
 +static const TypeInfo ev5_cpu_type_info = {
 +    .name = TYPE("ev5"),
 +    .parent = TYPE_ALPHA_CPU,
 +    .instance_init = ev5_cpu_initfn,
 +};
 +
 +static void ev56_cpu_initfn(Object *obj)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +    CPUAlphaState *env = &cpu->env;
 +
 +    env->amask |= AMASK_BWX;
 +}
 +
 +static const TypeInfo ev56_cpu_type_info = {
 +    .name = TYPE("ev56"),
 +    .parent = TYPE("ev5"),
 +    .instance_init = ev56_cpu_initfn,
 +};
 +
 +static void pca56_cpu_initfn(Object *obj)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +    CPUAlphaState *env = &cpu->env;
 +
 +    env->amask |= AMASK_MVI;
 +}
 +
 +static const TypeInfo pca56_cpu_type_info = {
 +    .name = TYPE("pca56"),
 +    .parent = TYPE("ev56"),
 +    .instance_init = pca56_cpu_initfn,
 +};
 +
 +static void ev6_cpu_initfn(Object *obj)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +    CPUAlphaState *env = &cpu->env;
 +
 +    env->implver = IMPLVER_21264;
 +    env->amask = AMASK_BWX | AMASK_FIX | AMASK_MVI | AMASK_TRAP;
 +}
 +
 +static const TypeInfo ev6_cpu_type_info = {
 +    .name = TYPE("ev6"),
 +    .parent = TYPE_ALPHA_CPU,
 +    .instance_init = ev6_cpu_initfn,
 +};
 +
 +static void ev67_cpu_initfn(Object *obj)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(obj);
 +    CPUAlphaState *env = &cpu->env;
 +
 +    env->amask |= AMASK_CIX | AMASK_PREFETCH;
 +}
 +
 +static const TypeInfo ev67_cpu_type_info = {
 +    .name = TYPE("ev67"),
 +    .parent = TYPE("ev6"),
 +    .instance_init = ev67_cpu_initfn,
 +};
 +
 +static const TypeInfo ev68_cpu_type_info = {
 +    .name = TYPE("ev68"),
 +    .parent = TYPE("ev67"),
 +};
 +
  static void alpha_cpu_initfn(Object *obj)
  {
      AlphaCPU *cpu = ALPHA_CPU(obj);
diff --cc target-alpha/translate.c
index dc0c97c,3afc3c6..5cb40b7
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@@ -88,9 -88,9 +88,9 @@@ static TCGv cpu_usp
  /* register names */
  static char cpu_reg_names[10*4+21*5 + 10*5+21*6];
  
- #include "gen-icount.h"
+ #include "exec/gen-icount.h"
  
 -static void alpha_translate_init(void)
 +void alpha_translate_init(void)
  {
      int i;
      char *p;
commit 36f25d2537c40c6c47f4abee5d31a24863d1adf7
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Thu Dec 20 00:04:09 2012 +0400

    target-xtensa: fix search_pc for the last TB opcode
    
    Zero out tcg_ctx.gen_opc_instr_start for instructions representing the
    last guest opcode in the TB.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 0a03729..7029ac4 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -3005,7 +3005,11 @@ static void gen_intermediate_code_internal(
     gen_icount_end(tb, insn_count);
     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
 
-    if (!search_pc) {
+    if (search_pc) {
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
+        memset(tcg_ctx.gen_opc_instr_start + lj + 1, 0,
+                (j - lj) * sizeof(tcg_ctx.gen_opc_instr_start[0]));
+    } else {
         tb->size = dc.pc - pc_start;
         tb->icount = insn_count;
     }
commit 3f124b687462ce3140d963a024705a89cdc8cee8
Author: Liming Wang <walimisdev at gmail.com>
Date:   Fri Dec 21 16:56:58 2012 +0800

    net: add missing include file
    
    To fix building error:
    
    CC    net/vde.o
    net/vde.c: In function â€˜vde_cleanupâ€™:
    net/vde.c:65:5: error: implicit declaration of function â€˜qemu_set_fd_handlerâ€™ [-Werror=implicit-function-declaration]
    net/vde.c:65:5: error: nested extern declaration of â€˜qemu_set_fd_handlerâ€™ [-Werror=nested-externs]
    cc1: all warnings being treated as errors
    
    Signed-off-by: Liming Wang <walimisdev at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/net/vde.c b/net/vde.c
index 754a141..4dea32d 100644
--- a/net/vde.c
+++ b/net/vde.c
@@ -29,6 +29,7 @@
 #include "clients.h"
 #include "qemu-common.h"
 #include "qemu/option.h"
+#include "qemu/main-loop.h"
 
 typedef struct VDEState {
     NetClientState nc;
commit 0c884d1659f02b4a0c704c2344f42e3fabb1f193
Author: é™³éŸ‹ä»» (Wei-Ren Chen) <chenwj at iis.sinica.edu.tw>
Date:   Thu Dec 20 09:39:16 2012 +0800

    translate-all.c: Use tb1->phys_hash_next directly in tb_remove
    
      When tb_remove was first commited at fd6ce8f6, there were three different
    calls pass different names to offsetof. In current codebase, the other two
    calls are replaced with tb_page_remove. There is no need to have a general
    tb_remove. Omit passing the third parameter and using tb1->phys_hash_next
    directly.
    
    Signed-off-by: Chen Wei-Ren <chenwj at iis.sinica.edu.tw>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/translate-all.c b/translate-all.c
index b621748..d367fc4 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -759,19 +759,17 @@ static void tb_page_check(void)
 
 #endif
 
-/* invalidate one TB */
-static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
-                             int next_offset)
+static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb)
 {
     TranslationBlock *tb1;
 
     for (;;) {
         tb1 = *ptb;
         if (tb1 == tb) {
-            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
+            *ptb = tb1->phys_hash_next;
             break;
         }
-        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
+        ptb = &tb1->phys_hash_next;
     }
 }
 
@@ -828,6 +826,7 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
     tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
 }
 
+/* invalidate one TB */
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 {
     CPUArchState *env;
@@ -839,8 +838,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
     h = tb_phys_hash_func(phys_pc);
-    tb_remove(&tb_phys_hash[h], tb,
-              offsetof(TranslationBlock, phys_hash_next));
+    tb_hash_remove(&tb_phys_hash[h], tb);
 
     /* remove the TB from the page list */
     if (tb->page_addr[0] != page_addr) {
commit b2136140f68ce05122f611eb9cde4f0365ab6a00
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Dec 20 23:09:53 2012 +0100

    net: Add missing include statement (fix compiler warnings for MinGW)
    
    These and some more compiler warnings were caused by a recent commit:
    
    net/tap-win32.c:724: warning: no previous prototype for â€˜tap_has_ufoâ€™
    net/tap-win32.c:729: warning: no previous prototype for â€˜tap_has_vnet_hdrâ€™
    ...
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/net/tap-win32.c b/net/tap-win32.c
index 0c63cbd..265369c 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -31,6 +31,7 @@
 #include "qemu-common.h"
 #include "clients.h"            /* net_init_tap */
 #include "net/net.h"
+#include "net/tap.h"            /* tap_has_ufo, ... */
 #include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
 #include <stdio.h>
commit 4ad549e89e55fb48eb6feb783ee4a9ede1dea52e
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Dec 20 12:29:20 2012 +0100

    xen: add missing include
    
    xen-all needs to access CharDeviceState's filename field, so
    it needs to include char/char.h.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Tested-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/xen-all.c b/xen-all.c
index 50edaec..19bcfd1 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -16,6 +16,7 @@
 #include "hw/xen_backend.h"
 #include "qmp-commands.h"
 
+#include "char/char.h"
 #include "qemu/range.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
commit ca273d58d8a77d5cc9d42440bcdf9d7cad2054bc
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Dec 20 12:29:19 2012 +0100

    build: fix includes for VNC
    
    vnc-tls.h is included by vnc.h, and it includes gnutls/gnutls.h.
    Hence, GnuTLS header files are needed by all files that include
    vnc.h, most notably qmp.c.  Move these flags to QEMU_CFLAGS for
    simplicity.
    
    Reported-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/Makefile.target b/Makefile.target
index 8bbad38..be8b8b8 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -120,11 +120,6 @@ obj-$(CONFIG_NO_GET_MEMORY_MAPPING) += memory_mapping-stub.o
 obj-$(CONFIG_NO_CORE_DUMP) += dump-stub.o
 LIBS+=-lz
 
-QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
-QEMU_CFLAGS += $(VNC_SASL_CFLAGS)
-QEMU_CFLAGS += $(VNC_JPEG_CFLAGS)
-QEMU_CFLAGS += $(VNC_PNG_CFLAGS)
-
 # xen support
 obj-$(CONFIG_XEN) += xen-all.o xen-mapcache.o
 obj-$(CONFIG_NO_XEN) += xen-stub.o
diff --git a/configure b/configure
index b101d5c..14f05c7 100755
--- a/configure
+++ b/configure
@@ -1712,6 +1712,7 @@ EOF
   if compile_prog "$vnc_tls_cflags" "$vnc_tls_libs" ; then
     vnc_tls=yes
     libs_softmmu="$vnc_tls_libs $libs_softmmu"
+    QEMU_CFLAGS="$QEMU_CFLAGS $vnc_tls_cflags"
   else
     if test "$vnc_tls" = "yes" ; then
       feature_not_found "vnc-tls"
@@ -1734,6 +1735,7 @@ EOF
   if compile_prog "$vnc_sasl_cflags" "$vnc_sasl_libs" ; then
     vnc_sasl=yes
     libs_softmmu="$vnc_sasl_libs $libs_softmmu"
+    QEMU_CFLAGS="$QEMU_CFLAGS $vnc_sasl_cflags"
   else
     if test "$vnc_sasl" = "yes" ; then
       feature_not_found "vnc-sasl"
@@ -1755,6 +1757,7 @@ EOF
   if compile_prog "$vnc_jpeg_cflags" "$vnc_jpeg_libs" ; then
     vnc_jpeg=yes
     libs_softmmu="$vnc_jpeg_libs $libs_softmmu"
+    QEMU_CFLAGS="$QEMU_CFLAGS $vnc_jpeg_cflags"
   else
     if test "$vnc_jpeg" = "yes" ; then
       feature_not_found "vnc-jpeg"
@@ -3377,19 +3380,15 @@ if test "$vnc" = "yes" ; then
 fi
 if test "$vnc_tls" = "yes" ; then
   echo "CONFIG_VNC_TLS=y" >> $config_host_mak
-  echo "VNC_TLS_CFLAGS=$vnc_tls_cflags" >> $config_host_mak
 fi
 if test "$vnc_sasl" = "yes" ; then
   echo "CONFIG_VNC_SASL=y" >> $config_host_mak
-  echo "VNC_SASL_CFLAGS=$vnc_sasl_cflags" >> $config_host_mak
 fi
 if test "$vnc_jpeg" = "yes" ; then
   echo "CONFIG_VNC_JPEG=y" >> $config_host_mak
-  echo "VNC_JPEG_CFLAGS=$vnc_jpeg_cflags" >> $config_host_mak
 fi
 if test "$vnc_png" = "yes" ; then
   echo "CONFIG_VNC_PNG=y" >> $config_host_mak
-  echo "VNC_PNG_CFLAGS=$vnc_png_cflags" >> $config_host_mak
 fi
 if test "$fnmatch" = "yes" ; then
   echo "CONFIG_FNMATCH=y" >> $config_host_mak
diff --git a/ui/Makefile.objs b/ui/Makefile.objs
index dc8f0e4..6768bb7 100644
--- a/ui/Makefile.objs
+++ b/ui/Makefile.objs
@@ -14,6 +14,5 @@ common-obj-$(CONFIG_CURSES) += curses.o
 common-obj-$(CONFIG_VNC) += $(vnc-obj-y)
 
 $(obj)/sdl.o $(obj)/sdl_zoom.o: QEMU_CFLAGS += $(SDL_CFLAGS) 
-$(obj)/vnc.o: QEMU_CFLAGS += $(VNC_TLS_CFLAGS)
 
 $(obj)/cocoa.o: $(SRC_PATH)/$(obj)/cocoa.m
commit bb5801f551ee8591d576d87a9290af297998e322
Merge: 914606d 9848a40
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Fri Dec 21 07:53:48 2012 -0600

    Merge remote-tracking branch 'quintela/thread-20121220.next' into staging
    
    * quintela/thread-20121220.next: (79 commits)
      migration: merge QEMUFileBuffered into MigrationState
      migration: fix qemu_get_fd for BufferedFile
      ram: refactor ram_save_block() return value
      ram: account the amount of transferred ram better
      ram: optimize migration bitmap walking
      ram: Use memory_region_test_and_clear_dirty
      memory: introduce memory_region_test_and_clear_dirty
      ram: Add last_sent_block
      ram: rename last_block to last_seen_block
      migration: move migration notifier
      migration: Inline qemu_fopen_ops_buffered into migrate_fd_connect
      migration: move migration_fd_put_ready()
      migration: add XFER_LIMIT_RATIO
      migration: move buffered_file.c code into migration.c
      savevm: New save live migration method: pending
      buffered_file: unfold buffered_append in buffered_put_buffer
      buffered_file: don't flush on put buffer
      buffered_file: Unfold the trick to restart generating migration data
      migration: just lock migrate_fd_put_ready
      migration: remove unfreeze logic
      ...
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 9848a40427cd76628d04d918fa4751c542527915
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Dec 19 09:55:50 2012 +0100

    migration: merge QEMUFileBuffered into MigrationState
    
    Avoid splitting the state of outgoing migration, more or less arbitrarily,
    between two data structures.  QEMUFileBuffered anyway is used only during
    migration.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 0c9bf8b..2d5b630 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -16,6 +16,7 @@
 
 #include "qapi/qmp/qdict.h"
 #include "qemu-common.h"
+#include "qemu/thread.h"
 #include "qemu/notify.h"
 #include "qapi/error.h"
 #include "migration/vmstate.h"
@@ -31,6 +32,13 @@ typedef struct MigrationState MigrationState;
 struct MigrationState
 {
     int64_t bandwidth_limit;
+    size_t bytes_xfer;
+    size_t xfer_limit;
+    uint8_t *buffer;
+    size_t buffer_size;
+    size_t buffer_capacity;
+    QemuThread thread;
+
     QEMUFile *file;
     int fd;
     int state;
diff --git a/migration.c b/migration.c
index 3fcc1c8..c69e864 100644
--- a/migration.c
+++ b/migration.c
@@ -521,18 +521,8 @@ int64_t migrate_xbzrle_cache_size(void)
 
 /* migration thread support */
 
-typedef struct QEMUFileBuffered {
-    MigrationState *migration_state;
-    QEMUFile *file;
-    size_t bytes_xfer;
-    size_t xfer_limit;
-    uint8_t *buffer;
-    size_t buffer_size;
-    size_t buffer_capacity;
-    QemuThread thread;
-} QEMUFileBuffered;
-
-static ssize_t buffered_flush(QEMUFileBuffered *s)
+
+static ssize_t buffered_flush(MigrationState *s)
 {
     size_t offset = 0;
     ssize_t ret = 0;
@@ -541,8 +531,7 @@ static ssize_t buffered_flush(QEMUFileBuffered *s)
 
     while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
         size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer);
-        ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
-                                    to_send);
+        ret = migrate_fd_put_buffer(s, s->buffer + offset, to_send);
         if (ret <= 0) {
             DPRINTF("error flushing data, %zd\n", ret);
             break;
@@ -566,7 +555,7 @@ static ssize_t buffered_flush(QEMUFileBuffered *s)
 static int buffered_put_buffer(void *opaque, const uint8_t *buf,
                                int64_t pos, int size)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
     ssize_t error;
 
     DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
@@ -598,7 +587,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf,
 
 static int buffered_close(void *opaque)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
     ssize_t ret = 0;
     int ret2;
 
@@ -612,20 +601,20 @@ static int buffered_close(void *opaque)
         }
     }
 
-    ret2 = migrate_fd_close(s->migration_state);
+    ret2 = migrate_fd_close(s);
     if (ret >= 0) {
         ret = ret2;
     }
-    ret = migrate_fd_close(s->migration_state);
-    s->migration_state->complete = true;
+    ret = migrate_fd_close(s);
+    s->complete = true;
     return ret;
 }
 
 static int buffered_get_fd(void *opaque)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
 
-    return s->migration_state->fd;
+    return s->fd;
 }
 
 /*
@@ -636,7 +625,7 @@ static int buffered_get_fd(void *opaque)
  */
 static int buffered_rate_limit(void *opaque)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
     int ret;
 
     ret = qemu_file_get_error(s->file);
@@ -653,7 +642,7 @@ static int buffered_rate_limit(void *opaque)
 
 static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
     if (qemu_file_get_error(s->file)) {
         goto out;
     }
@@ -669,7 +658,7 @@ out:
 
 static int64_t buffered_get_rate_limit(void *opaque)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
 
     return s->xfer_limit;
 }
@@ -741,7 +730,7 @@ static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size)
 
 static void *buffered_file_thread(void *opaque)
 {
-    QEMUFileBuffered *s = opaque;
+    MigrationState *s = opaque;
     int64_t initial_time = qemu_get_clock_ms(rt_clock);
     int64_t max_size = 0;
     bool last_round = false;
@@ -749,7 +738,7 @@ static void *buffered_file_thread(void *opaque)
     while (true) {
         int64_t current_time = qemu_get_clock_ms(rt_clock);
 
-        if (s->migration_state->complete) {
+        if (s->complete) {
             break;
         }
         if (current_time >= initial_time + BUFFER_DELAY) {
@@ -776,12 +765,11 @@ static void *buffered_file_thread(void *opaque)
         DPRINTF("file is ready\n");
         if (s->bytes_xfer < s->xfer_limit) {
             DPRINTF("notifying client\n");
-            last_round = migrate_fd_put_ready(s->migration_state, max_size);
+            last_round = migrate_fd_put_ready(s, max_size);
         }
     }
 
     g_free(s->buffer);
-    g_free(s);
     return NULL;
 }
 
@@ -794,22 +782,21 @@ static const QEMUFileOps buffered_file_ops = {
     .set_rate_limit = buffered_set_rate_limit,
 };
 
-void migrate_fd_connect(MigrationState *migration_state)
+void migrate_fd_connect(MigrationState *s)
 {
-    QEMUFileBuffered *s;
+    s->state = MIG_STATE_ACTIVE;
+    s->bytes_xfer = 0;
+    s->buffer = NULL;
+    s->buffer_size = 0;
+    s->buffer_capacity = 0;
 
-    migration_state->state = MIG_STATE_ACTIVE;
-    migration_state->first_time = true;
-    s = g_malloc0(sizeof(*s));
+    s->first_time = true;
 
-    s->migration_state = migration_state;
-    s->xfer_limit = s->migration_state->bandwidth_limit / XFER_LIMIT_RATIO;
-    s->migration_state->complete = false;
+    s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO;
+    s->complete = false;
 
     s->file = qemu_fopen_ops(s, &buffered_file_ops);
 
-    migration_state->file = s->file;
-
     qemu_thread_create(&s->thread, buffered_file_thread, s,
                        QEMU_THREAD_DETACHED);
     notifier_list_notify(&migration_state_notifiers, s);
commit e659586e63793a8a61efc4a658e2908ac8a2e935
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Nov 8 00:42:50 2012 +0100

    migration: fix qemu_get_fd for BufferedFile
    
    Not really used, but nice to have it correct. :)
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/migration.c b/migration.c
index 596aca7..3fcc1c8 100644
--- a/migration.c
+++ b/migration.c
@@ -625,7 +625,7 @@ static int buffered_get_fd(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
 
-    return qemu_get_fd(s->file);
+    return s->migration_state->fd;
 }
 
 /*
commit b823ceaadfaad65f3a034ada394b33ca1bf1a914
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Dec 10 13:27:50 2012 +0100

    ram: refactor ram_save_block() return value
    
    It could only return 0 if we only found dirty xbzrle pages that hadn't
    changed (i.e. they were written with the same content).  We don't care
    about that case, it is the same than nothing dirty.
    
    So now the return of the function is how much have it written, nothing
    else. Adjust callers.
    
    And we also made ram_save_iterate() return the number of transferred
    bytes, not the number of transferred pages.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 4015bfd..86f8544 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -422,9 +422,8 @@ static void migration_bitmap_sync(void)
 /*
  * ram_save_block: Writes a page of memory to the stream f
  *
- * Returns:  0: if the page hasn't changed
- *          -1: if there are no more dirty pages
- *           n: the amount of bytes written in other case
+ * Returns:  The number of bytes written.
+ *           0 means no dirty pages
  */
 
 static int ram_save_block(QEMUFile *f, bool last_stage)
@@ -432,7 +431,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
     RAMBlock *block = last_seen_block;
     ram_addr_t offset = last_offset;
     bool complete_round = false;
-    int bytes_sent = -1;
+    int bytes_sent = 0;
     MemoryRegion *mr;
     ram_addr_t current_addr;
 
@@ -460,6 +459,8 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 
             p = memory_region_get_ram_ptr(mr) + offset;
 
+            /* In doubt sent page as normal */
+            bytes_sent = -1;
             if (is_dup_page(p)) {
                 acct_info.dup_pages++;
                 bytes_sent = save_block_hdr(f, block, offset, cont,
@@ -475,7 +476,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                 }
             }
 
-            /* either we didn't send yet (we may have had XBZRLE overflow) */
+            /* XBZRLE overflow or normal page */
             if (bytes_sent == -1) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
@@ -484,7 +485,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             }
 
             /* if page is unmodified, continue to the next */
-            if (bytes_sent != 0) {
+            if (bytes_sent > 0) {
                 last_sent_block = block;
                 break;
             }
@@ -605,6 +606,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     int ret;
     int i;
     int64_t t0;
+    int total_sent = 0;
 
     qemu_mutex_lock_ramlist();
 
@@ -619,10 +621,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
 
         bytes_sent = ram_save_block(f, false);
         /* no more blocks to sent */
-        if (bytes_sent < 0) {
+        if (bytes_sent == 0) {
             break;
         }
-        bytes_transferred += bytes_sent;
+        total_sent += bytes_sent;
         acct_info.iterations++;
         /* we want to check in the 1st loop, just in case it was the 1st time
            and we had to sync the dirty bitmap.
@@ -641,13 +643,16 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     }
 
     if (ret < 0) {
+        bytes_transferred += total_sent;
         return ret;
     }
 
     qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    total_sent += 8;
+    bytes_transferred += total_sent;
 
-    return i;
+    return total_sent;
 }
 
 static int ram_save_complete(QEMUFile *f, void *opaque)
@@ -664,7 +669,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 
         bytes_sent = ram_save_block(f, true);
         /* no more blocks to sent */
-        if (bytes_sent < 0) {
+        if (bytes_sent == 0) {
             break;
         }
         bytes_transferred += bytes_sent;
commit 3f7d7b098194ec893efa037491f6231687ff043a
Author: Juan Quintela <quintela at redhat.com>
Date:   Thu Oct 18 13:56:35 2012 +0200

    ram: account the amount of transferred ram better
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 74dc9c8..4015bfd 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -265,16 +265,21 @@ uint64_t xbzrle_mig_pages_overflow(void)
     return acct_info.xbzrle_overflows;
 }
 
-static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
-        int cont, int flag)
+static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+                             int cont, int flag)
 {
-        qemu_put_be64(f, offset | cont | flag);
-        if (!cont) {
-                qemu_put_byte(f, strlen(block->idstr));
-                qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                strlen(block->idstr));
-        }
+    size_t size;
+
+    qemu_put_be64(f, offset | cont | flag);
+    size = 8;
 
+    if (!cont) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr,
+                        strlen(block->idstr));
+        size += 1 + strlen(block->idstr);
+    }
+    return size;
 }
 
 #define ENCODING_FLAG_XBZRLE 0x1
@@ -321,11 +326,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
     }
 
     /* Send XBZRLE based compressed page */
-    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
     qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
     qemu_put_be16(f, encoded_len);
     qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
-    bytes_sent = encoded_len + 1 + 2;
+    bytes_sent += encoded_len + 1 + 2;
     acct_info.xbzrle_pages++;
     acct_info.xbzrle_bytes += bytes_sent;
 
@@ -457,9 +462,10 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 
             if (is_dup_page(p)) {
                 acct_info.dup_pages++;
-                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
+                bytes_sent = save_block_hdr(f, block, offset, cont,
+                                            RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
-                bytes_sent = 1;
+                bytes_sent += 1;
             } else if (migrate_use_xbzrle()) {
                 current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
@@ -471,9 +477,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 
             /* either we didn't send yet (we may have had XBZRLE overflow) */
             if (bytes_sent == -1) {
-                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
+                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-                bytes_sent = TARGET_PAGE_SIZE;
+                bytes_sent += TARGET_PAGE_SIZE;
                 acct_info.norm_pages++;
             }
 
commit 4c8ae0f60e63478aea0a1741cca95474b68fb949
Author: Juan Quintela <quintela at redhat.com>
Date:   Thu Oct 18 00:00:59 2012 +0200

    ram: optimize migration bitmap walking
    
    Instead of testing each page individually, we search what is the next
    dirty page with a bitmap operation.  We have to reorganize the code to
    move from a "for" loop, to a while(dirty) loop.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 9f32ee0..74dc9c8 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -343,18 +343,21 @@ static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
 static uint32_t last_version;
 
-static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
-                                                         ram_addr_t offset)
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+                                                 ram_addr_t start)
 {
-    bool ret;
-    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
 
-    ret = test_and_clear_bit(nr, migration_bitmap);
+    unsigned long next = find_next_bit(migration_bitmap, size, nr);
 
-    if (ret) {
+    if (next < size) {
+        clear_bit(next, migration_bitmap);
         migration_dirty_pages--;
     }
-    return ret;
+    return (next - base) << TARGET_PAGE_BITS;
 }
 
 static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
@@ -423,6 +426,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 {
     RAMBlock *block = last_seen_block;
     ram_addr_t offset = last_offset;
+    bool complete_round = false;
     int bytes_sent = -1;
     MemoryRegion *mr;
     ram_addr_t current_addr;
@@ -430,9 +434,21 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
     if (!block)
         block = QTAILQ_FIRST(&ram_list.blocks);
 
-    do {
+    while (true) {
         mr = block->mr;
-        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        if (complete_round && block == last_seen_block &&
+            offset >= last_offset) {
+            break;
+        }
+        if (offset >= block->length) {
+            offset = 0;
+            block = QTAILQ_NEXT(block, next);
+            if (!block) {
+                block = QTAILQ_FIRST(&ram_list.blocks);
+                complete_round = true;
+            }
+        } else {
             uint8_t *p;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
@@ -467,16 +483,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                 break;
             }
         }
-
-        offset += TARGET_PAGE_SIZE;
-        if (offset >= block->length) {
-            offset = 0;
-            block = QTAILQ_NEXT(block, next);
-            if (!block)
-                block = QTAILQ_FIRST(&ram_list.blocks);
-        }
-    } while (block != last_seen_block || offset != last_offset);
-
+    }
     last_seen_block = block;
     last_offset = offset;
 
commit ece7931817e03a4d946c15716fab5e4f781663c9
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 17 20:27:15 2012 +0200

    ram: Use memory_region_test_and_clear_dirty
    
    This avoids having to do two walks over the dirty bitmap, once reading
    the dirty bits, and anthoer cleaning them.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index caac526..9f32ee0 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -390,13 +390,12 @@ static void migration_bitmap_sync(void)
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
-                                        DIRTY_MEMORY_MIGRATION)) {
+            if (memory_region_test_and_clear_dirty(block->mr,
+                                                   addr, TARGET_PAGE_SIZE,
+                                                   DIRTY_MEMORY_MIGRATION)) {
                 migration_bitmap_set_dirty(block->mr, addr);
             }
         }
-        memory_region_reset_dirty(block->mr, 0, block->length,
-                                  DIRTY_MEMORY_MIGRATION);
     }
     trace_migration_bitmap_sync_end(migration_dirty_pages
                                     - num_dirty_pages_init);
commit 6c279db8ee99e64e498447c67c16e987150be96b
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 17 20:24:28 2012 +0200

    memory: introduce memory_region_test_and_clear_dirty
    
    This function avoids having to do two calls, one to test the dirty bit, and
    other to reset it.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/include/exec/memory.h b/include/exec/memory.h
index aada969..2322732 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -454,6 +454,22 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
                              hwaddr size);
 
 /**
+ * memory_region_test_and_clear_dirty: Check whether a range of bytes is dirty
+ *                                     for a specified client. It clears them.
+ *
+ * Checks whether a range of bytes has been written to since the last
+ * call to memory_region_reset_dirty() with the same @client.  Dirty logging
+ * must be enabled.
+ *
+ * @mr: the memory region being queried.
+ * @addr: the address (relative to the start of the region) being queried.
+ * @size: the size of the range being queried.
+ * @client: the user of the logging information; %DIRTY_MEMORY_MIGRATION or
+ *          %DIRTY_MEMORY_VGA.
+ */
+bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
+                                        hwaddr size, unsigned client);
+/**
  * memory_region_sync_dirty_bitmap: Synchronize a region's dirty bitmap with
  *                                  any external TLBs (e.g. kvm)
  *
diff --git a/memory.c b/memory.c
index 35e6122..410c5f8 100644
--- a/memory.c
+++ b/memory.c
@@ -1081,6 +1081,22 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
     return cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size, -1);
 }
 
+bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
+                                        hwaddr size, unsigned client)
+{
+    bool ret;
+    assert(mr->terminates);
+    ret = cpu_physical_memory_get_dirty(mr->ram_addr + addr, size,
+                                        1 << client);
+    if (ret) {
+        cpu_physical_memory_reset_dirty(mr->ram_addr + addr,
+                                        mr->ram_addr + addr + size,
+                                        1 << client);
+    }
+    return ret;
+}
+
+
 void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
     AddressSpace *as;
commit 5f718a15d0db3775bbcf2755a35dd6b019bcff8b
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 17 20:10:55 2012 +0200

    ram: Add last_sent_block
    
    This is the last block from where we have sent data.
    
    Signed-off-by: Orit Wasserman <owasserm at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 4351d30..caac526 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -336,6 +336,8 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
 /* This is the last block that we have visited serching for dirty pages
  */
 static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
@@ -433,7 +435,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
         mr = block->mr;
         if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
             uint8_t *p;
-            int cont = (block == last_seen_block) ?
+            int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
 
             p = memory_region_get_ram_ptr(mr) + offset;
@@ -462,6 +464,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 
             /* if page is unmodified, continue to the next */
             if (bytes_sent != 0) {
+                last_sent_block = block;
                 break;
             }
         }
@@ -535,6 +538,7 @@ static void ram_migration_cancel(void *opaque)
 static void reset_ram_globals(void)
 {
     last_seen_block = NULL;
+    last_sent_block = NULL;
     last_offset = 0;
     last_version = ram_list.version;
 }
commit b23a9a5cad356cdc8e25d4be72e53096a27ea722
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 17 20:08:04 2012 +0200

    ram: rename last_block to last_seen_block
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index af1ae9f..4351d30 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -332,7 +332,10 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
     return bytes_sent;
 }
 
-static RAMBlock *last_block;
+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
 static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
@@ -417,7 +420,7 @@ static void migration_bitmap_sync(void)
 
 static int ram_save_block(QEMUFile *f, bool last_stage)
 {
-    RAMBlock *block = last_block;
+    RAMBlock *block = last_seen_block;
     ram_addr_t offset = last_offset;
     int bytes_sent = -1;
     MemoryRegion *mr;
@@ -430,7 +433,8 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
         mr = block->mr;
         if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
             uint8_t *p;
-            int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
+            int cont = (block == last_seen_block) ?
+                RAM_SAVE_FLAG_CONTINUE : 0;
 
             p = memory_region_get_ram_ptr(mr) + offset;
 
@@ -469,9 +473,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             if (!block)
                 block = QTAILQ_FIRST(&ram_list.blocks);
         }
-    } while (block != last_block || offset != last_offset);
+    } while (block != last_seen_block || offset != last_offset);
 
-    last_block = block;
+    last_seen_block = block;
     last_offset = offset;
 
     return bytes_sent;
@@ -530,7 +534,7 @@ static void ram_migration_cancel(void *opaque)
 
 static void reset_ram_globals(void)
 {
-    last_block = NULL;
+    last_seen_block = NULL;
     last_offset = 0;
     last_version = ram_list.version;
 }
commit 0d3b26f5488e04c01667dd12c9bd7eed54dda258
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 3 20:04:41 2012 +0200

    migration: move migration notifier
    
    At this point, it is waranteed that state is ACTIVE.  Old position
    didn't assured hat.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/migration.c b/migration.c
index 4a9f0b5..596aca7 100644
--- a/migration.c
+++ b/migration.c
@@ -455,8 +455,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
         error_propagate(errp, local_err);
         return;
     }
-
-    notifier_list_notify(&migration_state_notifiers, s);
 }
 
 void qmp_migrate_cancel(Error **errp)
@@ -814,4 +812,5 @@ void migrate_fd_connect(MigrationState *migration_state)
 
     qemu_thread_create(&s->thread, buffered_file_thread, s,
                        QEMU_THREAD_DETACHED);
+    notifier_list_notify(&migration_state_notifiers, s);
 }
commit 2e450865338738300e529457879d81332939f064
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 3 18:23:03 2012 +0200

    migration: Inline qemu_fopen_ops_buffered into migrate_fd_connect
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2998dcc..0c9bf8b 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -127,6 +127,4 @@ int migrate_use_xbzrle(void);
 int64_t migrate_xbzrle_cache_size(void);
 
 int64_t xbzrle_cache_resize(int64_t new_size);
-
-void qemu_fopen_ops_buffered(MigrationState *migration_state);
 #endif
diff --git a/migration.c b/migration.c
index a387201..4a9f0b5 100644
--- a/migration.c
+++ b/migration.c
@@ -371,13 +371,6 @@ bool migration_has_failed(MigrationState *s)
             s->state == MIG_STATE_ERROR);
 }
 
-void migrate_fd_connect(MigrationState *s)
-{
-    s->state = MIG_STATE_ACTIVE;
-    s->first_time = true;
-    qemu_fopen_ops_buffered(s);
-}
-
 static MigrationState *migrate_init(const MigrationParams *params)
 {
     MigrationState *s = migrate_get_current();
@@ -803,10 +796,12 @@ static const QEMUFileOps buffered_file_ops = {
     .set_rate_limit = buffered_set_rate_limit,
 };
 
-void qemu_fopen_ops_buffered(MigrationState *migration_state)
+void migrate_fd_connect(MigrationState *migration_state)
 {
     QEMUFileBuffered *s;
 
+    migration_state->state = MIG_STATE_ACTIVE;
+    migration_state->first_time = true;
     s = g_malloc0(sizeof(*s));
 
     s->migration_state = migration_state;
commit 0e288fa369c02df1731dc59ffbf158f5e5f2d80f
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 3 17:23:59 2012 +0200

    migration: move migration_fd_put_ready()
    
    Put it near its use and un-export it.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/include/migration/migration.h b/include/migration/migration.h
index af7de03..2998dcc 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -81,7 +81,6 @@ void migrate_fd_connect(MigrationState *s);
 
 ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
                               size_t size);
-bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size);
 int migrate_fd_close(MigrationState *s);
 
 void add_migration_state_change_notifier(Notifier *notify);
diff --git a/migration.c b/migration.c
index 15f22ea..a387201 100644
--- a/migration.c
+++ b/migration.c
@@ -321,71 +321,6 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
     return ret;
 }
 
-bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size)
-{
-    int ret;
-    uint64_t pending_size;
-    bool last_round = false;
-
-    qemu_mutex_lock_iothread();
-    if (s->state != MIG_STATE_ACTIVE) {
-        DPRINTF("put_ready returning because of non-active state\n");
-        qemu_mutex_unlock_iothread();
-        return false;
-    }
-    if (s->first_time) {
-        s->first_time = false;
-        DPRINTF("beginning savevm\n");
-        ret = qemu_savevm_state_begin(s->file, &s->params);
-        if (ret < 0) {
-            DPRINTF("failed, %d\n", ret);
-            migrate_fd_error(s);
-            qemu_mutex_unlock_iothread();
-            return false;
-        }
-    }
-
-    DPRINTF("iterate\n");
-    pending_size = qemu_savevm_state_pending(s->file, max_size);
-    DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
-    if (pending_size >= max_size) {
-        ret = qemu_savevm_state_iterate(s->file);
-        if (ret < 0) {
-            migrate_fd_error(s);
-        }
-    } else {
-        int old_vm_running = runstate_is_running();
-        int64_t start_time, end_time;
-
-        DPRINTF("done iterating\n");
-        start_time = qemu_get_clock_ms(rt_clock);
-        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-        if (old_vm_running) {
-            vm_stop(RUN_STATE_FINISH_MIGRATE);
-        } else {
-            vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-        }
-
-        if (qemu_savevm_state_complete(s->file) < 0) {
-            migrate_fd_error(s);
-        } else {
-            migrate_fd_completed(s);
-        }
-        end_time = qemu_get_clock_ms(rt_clock);
-        s->total_time = end_time - s->total_time;
-        s->downtime = end_time - start_time;
-        if (s->state != MIG_STATE_COMPLETED) {
-            if (old_vm_running) {
-                vm_start();
-            }
-        }
-        last_round = true;
-    }
-    qemu_mutex_unlock_iothread();
-
-    return last_round;
-}
-
 static void migrate_fd_cancel(MigrationState *s)
 {
     if (s->state != MIG_STATE_ACTIVE)
@@ -748,6 +683,71 @@ static int64_t buffered_get_rate_limit(void *opaque)
     return s->xfer_limit;
 }
 
+static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size)
+{
+    int ret;
+    uint64_t pending_size;
+    bool last_round = false;
+
+    qemu_mutex_lock_iothread();
+    if (s->state != MIG_STATE_ACTIVE) {
+        DPRINTF("put_ready returning because of non-active state\n");
+        qemu_mutex_unlock_iothread();
+        return false;
+    }
+    if (s->first_time) {
+        s->first_time = false;
+        DPRINTF("beginning savevm\n");
+        ret = qemu_savevm_state_begin(s->file, &s->params);
+        if (ret < 0) {
+            DPRINTF("failed, %d\n", ret);
+            migrate_fd_error(s);
+            qemu_mutex_unlock_iothread();
+            return false;
+        }
+    }
+
+    DPRINTF("iterate\n");
+    pending_size = qemu_savevm_state_pending(s->file, max_size);
+    DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
+    if (pending_size >= max_size) {
+        ret = qemu_savevm_state_iterate(s->file);
+        if (ret < 0) {
+            migrate_fd_error(s);
+        }
+    } else {
+        int old_vm_running = runstate_is_running();
+        int64_t start_time, end_time;
+
+        DPRINTF("done iterating\n");
+        start_time = qemu_get_clock_ms(rt_clock);
+        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+        if (old_vm_running) {
+            vm_stop(RUN_STATE_FINISH_MIGRATE);
+        } else {
+            vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+        }
+
+        if (qemu_savevm_state_complete(s->file) < 0) {
+            migrate_fd_error(s);
+        } else {
+            migrate_fd_completed(s);
+        }
+        end_time = qemu_get_clock_ms(rt_clock);
+        s->total_time = end_time - s->total_time;
+        s->downtime = end_time - start_time;
+        if (s->state != MIG_STATE_COMPLETED) {
+            if (old_vm_running) {
+                vm_start();
+            }
+        }
+        last_round = true;
+    }
+    qemu_mutex_unlock_iothread();
+
+    return last_round;
+}
+
 static void *buffered_file_thread(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
commit 5b4e1eb769eee892b44d3f6b2369b05196442f59
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Dec 19 10:40:48 2012 +0100

    migration: add XFER_LIMIT_RATIO
    
    The "magic" divisions by 10 are there because of the value of BUFFER_DELAY.
    Introduce a constant to explain them better.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/migration.c b/migration.c
index 2937c87..15f22ea 100644
--- a/migration.c
+++ b/migration.c
@@ -44,6 +44,11 @@ enum {
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
+/* Amount of time to allocate to each "chunk" of bandwidth-throttled
+ * data. */
+#define BUFFER_DELAY     100
+#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
+
 /* Migration XBZRLE default cache size */
 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
 
@@ -743,9 +748,6 @@ static int64_t buffered_get_rate_limit(void *opaque)
     return s->xfer_limit;
 }
 
-/* 100ms  xfer_limit is the limit that we should write each 100ms */
-#define BUFFER_DELAY 100
-
 static void *buffered_file_thread(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
@@ -808,7 +810,7 @@ void qemu_fopen_ops_buffered(MigrationState *migration_state)
     s = g_malloc0(sizeof(*s));
 
     s->migration_state = migration_state;
-    s->xfer_limit = migration_state->bandwidth_limit / 10;
+    s->xfer_limit = s->migration_state->bandwidth_limit / XFER_LIMIT_RATIO;
     s->migration_state->complete = false;
 
     s->file = qemu_fopen_ops(s, &buffered_file_ops);
commit 0d82d0e8b98cf0ea03a45f8542d835ebd3a84cd3
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 3 14:18:33 2012 +0200

    migration: move buffered_file.c code into migration.c
    
    This only moves the code (also from buffered_file.h to migration.h).
    Fix whitespace until checkpatch is happy.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/Makefile.objs b/Makefile.objs
index a637a4a..4ef0a71 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -77,7 +77,8 @@ common-obj-$(CONFIG_LINUX) += fsdev/
 extra-obj-$(CONFIG_LINUX) += fsdev/
 
 common-obj-y += tcg-runtime.o host-utils.o main-loop.o
-common-obj-y += buffered_file.o migration.o migration-tcp.o
+common-obj-y += migration.o migration-tcp.o
+common-obj-y += migration.o migration-tcp.o
 common-obj-y += qemu-char.o #aio.o
 common-obj-y += block-migration.o iohandler.o
 common-obj-y += bitmap.o bitops.o
diff --git a/buffered_file.c b/buffered_file.c
deleted file mode 100644
index fdf7efa..0000000
--- a/buffered_file.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * QEMU buffered QEMUFile
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori at us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu-common.h"
-#include "hw/hw.h"
-#include "qemu/timer.h"
-#include "buffered_file.h"
-#include "qemu/thread.h"
-
-//#define DEBUG_BUFFERED_FILE
-
-typedef struct QEMUFileBuffered
-{
-    MigrationState *migration_state;
-    QEMUFile *file;
-    size_t bytes_xfer;
-    size_t xfer_limit;
-    uint8_t *buffer;
-    size_t buffer_size;
-    size_t buffer_capacity;
-    QemuThread thread;
-} QEMUFileBuffered;
-
-#ifdef DEBUG_BUFFERED_FILE
-#define DPRINTF(fmt, ...) \
-    do { printf("buffered-file: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
-static ssize_t buffered_flush(QEMUFileBuffered *s)
-{
-    size_t offset = 0;
-    ssize_t ret = 0;
-
-    DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);
-
-    while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
-        size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer);
-        ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
-                                    to_send);
-        if (ret <= 0) {
-            DPRINTF("error flushing data, %zd\n", ret);
-            break;
-        } else {
-            DPRINTF("flushed %zd byte(s)\n", ret);
-            offset += ret;
-            s->bytes_xfer += ret;
-        }
-    }
-
-    DPRINTF("flushed %zu of %zu byte(s)\n", offset, s->buffer_size);
-    memmove(s->buffer, s->buffer + offset, s->buffer_size - offset);
-    s->buffer_size -= offset;
-
-    if (ret < 0) {
-        return ret;
-    }
-    return offset;
-}
-
-static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
-{
-    QEMUFileBuffered *s = opaque;
-    ssize_t error;
-
-    DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
-
-    error = qemu_file_get_error(s->file);
-    if (error) {
-        DPRINTF("flush when error, bailing: %s\n", strerror(-error));
-        return error;
-    }
-
-    if (size <= 0) {
-        return size;
-    }
-
-    if (size > (s->buffer_capacity - s->buffer_size)) {
-        DPRINTF("increasing buffer capacity from %zu by %zu\n",
-                s->buffer_capacity, size + 1024);
-
-        s->buffer_capacity += size + 1024;
-
-        s->buffer = g_realloc(s->buffer, s->buffer_capacity);
-    }
-
-    memcpy(s->buffer + s->buffer_size, buf, size);
-    s->buffer_size += size;
-
-    return size;
-}
-
-static int buffered_close(void *opaque)
-{
-    QEMUFileBuffered *s = opaque;
-    ssize_t ret = 0;
-    int ret2;
-
-    DPRINTF("closing\n");
-
-    s->xfer_limit = INT_MAX;
-    while (!qemu_file_get_error(s->file) && s->buffer_size) {
-        ret = buffered_flush(s);
-        if (ret < 0) {
-            break;
-        }
-    }
-
-    ret2 = migrate_fd_close(s->migration_state);
-    if (ret >= 0) {
-        ret = ret2;
-    }
-    ret = migrate_fd_close(s->migration_state);
-    s->migration_state->complete = true;
-    return ret;
-}
-
-/*
- * The meaning of the return values is:
- *   0: We can continue sending
- *   1: Time to stop
- *   negative: There has been an error
- */
-static int buffered_get_fd(void *opaque)
-{
-    QEMUFileBuffered *s = opaque;
-
-    return qemu_get_fd(s->file);
-}
-
-static int buffered_rate_limit(void *opaque)
-{
-    QEMUFileBuffered *s = opaque;
-    int ret;
-
-    ret = qemu_file_get_error(s->file);
-    if (ret) {
-        return ret;
-    }
-
-    if (s->bytes_xfer > s->xfer_limit)
-        return 1;
-
-    return 0;
-}
-
-static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate)
-{
-    QEMUFileBuffered *s = opaque;
-    if (qemu_file_get_error(s->file)) {
-        goto out;
-    }
-    if (new_rate > SIZE_MAX) {
-        new_rate = SIZE_MAX;
-    }
-
-    s->xfer_limit = new_rate / 10;
-    
-out:
-    return s->xfer_limit;
-}
-
-static int64_t buffered_get_rate_limit(void *opaque)
-{
-    QEMUFileBuffered *s = opaque;
-  
-    return s->xfer_limit;
-}
-
-/* 100ms  xfer_limit is the limit that we should write each 100ms */
-#define BUFFER_DELAY 100
-
-static void *buffered_file_thread(void *opaque)
-{
-    QEMUFileBuffered *s = opaque;
-    int64_t initial_time = qemu_get_clock_ms(rt_clock);
-    int64_t max_size = 0;
-    bool last_round = false;
-
-    while (true) {
-        int64_t current_time = qemu_get_clock_ms(rt_clock);
-
-        if (s->migration_state->complete) {
-            break;
-        }
-        if (current_time >= initial_time + BUFFER_DELAY) {
-            uint64_t transferred_bytes = s->bytes_xfer;
-            uint64_t time_spent = current_time - initial_time;
-            double bandwidth = transferred_bytes / time_spent;
-            max_size = bandwidth * migrate_max_downtime() / 1000000;
-
-            DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
-                    " bandwidth %g max_size %" PRId64 "\n",
-                    transferred_bytes, time_spent, bandwidth, max_size);
-
-            s->bytes_xfer = 0;
-            initial_time = current_time;
-        }
-        if (!last_round && (s->bytes_xfer >= s->xfer_limit)) {
-            /* usleep expects microseconds */
-            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
-        }
-        if (buffered_flush(s) < 0) {
-            break;
-        }
-
-        DPRINTF("file is ready\n");
-        if (s->bytes_xfer < s->xfer_limit) {
-            DPRINTF("notifying client\n");
-            last_round = migrate_fd_put_ready(s->migration_state, max_size);
-        }
-    }
-
-    g_free(s->buffer);
-    g_free(s);
-    return NULL;
-}
-
-static const QEMUFileOps buffered_file_ops = {
-    .get_fd =         buffered_get_fd,
-    .put_buffer =     buffered_put_buffer,
-    .close =          buffered_close,
-    .rate_limit =     buffered_rate_limit,
-    .get_rate_limit = buffered_get_rate_limit,
-    .set_rate_limit = buffered_set_rate_limit,
-};
-
-void qemu_fopen_ops_buffered(MigrationState *migration_state)
-{
-    QEMUFileBuffered *s;
-
-    s = g_malloc0(sizeof(*s));
-
-    s->migration_state = migration_state;
-    s->xfer_limit = migration_state->bandwidth_limit / 10;
-    s->migration_state->complete = false;
-
-    s->file = qemu_fopen_ops(s, &buffered_file_ops);
-
-    migration_state->file = s->file;
-
-    qemu_thread_create(&s->thread, buffered_file_thread, s,
-                       QEMU_THREAD_DETACHED);
-}
diff --git a/buffered_file.h b/buffered_file.h
deleted file mode 100644
index d278f3d..0000000
--- a/buffered_file.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * QEMU buffered QEMUFile
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori at us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_BUFFERED_FILE_H
-#define QEMU_BUFFERED_FILE_H
-
-#include "hw/hw.h"
-#include "migration/migration.h"
-
-void qemu_fopen_ops_buffered(MigrationState *migration_state);
-
-#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9571ec5..af7de03 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -129,4 +129,5 @@ int64_t migrate_xbzrle_cache_size(void);
 
 int64_t xbzrle_cache_resize(int64_t new_size);
 
+void qemu_fopen_ops_buffered(MigrationState *migration_state);
 #endif
diff --git a/migration.c b/migration.c
index b6374ae..2937c87 100644
--- a/migration.c
+++ b/migration.c
@@ -16,7 +16,7 @@
 #include "qemu-common.h"
 #include "migration/migration.h"
 #include "monitor/monitor.h"
-#include "buffered_file.h"
+#include "migration/qemu-file.h"
 #include "sysemu/sysemu.h"
 #include "block/block.h"
 #include "qemu/sockets.h"
@@ -587,3 +587,234 @@ int64_t migrate_xbzrle_cache_size(void)
 
     return s->xbzrle_cache_size;
 }
+
+/* migration thread support */
+
+typedef struct QEMUFileBuffered {
+    MigrationState *migration_state;
+    QEMUFile *file;
+    size_t bytes_xfer;
+    size_t xfer_limit;
+    uint8_t *buffer;
+    size_t buffer_size;
+    size_t buffer_capacity;
+    QemuThread thread;
+} QEMUFileBuffered;
+
+static ssize_t buffered_flush(QEMUFileBuffered *s)
+{
+    size_t offset = 0;
+    ssize_t ret = 0;
+
+    DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);
+
+    while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
+        size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer);
+        ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
+                                    to_send);
+        if (ret <= 0) {
+            DPRINTF("error flushing data, %zd\n", ret);
+            break;
+        } else {
+            DPRINTF("flushed %zd byte(s)\n", ret);
+            offset += ret;
+            s->bytes_xfer += ret;
+        }
+    }
+
+    DPRINTF("flushed %zu of %zu byte(s)\n", offset, s->buffer_size);
+    memmove(s->buffer, s->buffer + offset, s->buffer_size - offset);
+    s->buffer_size -= offset;
+
+    if (ret < 0) {
+        return ret;
+    }
+    return offset;
+}
+
+static int buffered_put_buffer(void *opaque, const uint8_t *buf,
+                               int64_t pos, int size)
+{
+    QEMUFileBuffered *s = opaque;
+    ssize_t error;
+
+    DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
+
+    error = qemu_file_get_error(s->file);
+    if (error) {
+        DPRINTF("flush when error, bailing: %s\n", strerror(-error));
+        return error;
+    }
+
+    if (size <= 0) {
+        return size;
+    }
+
+    if (size > (s->buffer_capacity - s->buffer_size)) {
+        DPRINTF("increasing buffer capacity from %zu by %zu\n",
+                s->buffer_capacity, size + 1024);
+
+        s->buffer_capacity += size + 1024;
+
+        s->buffer = g_realloc(s->buffer, s->buffer_capacity);
+    }
+
+    memcpy(s->buffer + s->buffer_size, buf, size);
+    s->buffer_size += size;
+
+    return size;
+}
+
+static int buffered_close(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+    ssize_t ret = 0;
+    int ret2;
+
+    DPRINTF("closing\n");
+
+    s->xfer_limit = INT_MAX;
+    while (!qemu_file_get_error(s->file) && s->buffer_size) {
+        ret = buffered_flush(s);
+        if (ret < 0) {
+            break;
+        }
+    }
+
+    ret2 = migrate_fd_close(s->migration_state);
+    if (ret >= 0) {
+        ret = ret2;
+    }
+    ret = migrate_fd_close(s->migration_state);
+    s->migration_state->complete = true;
+    return ret;
+}
+
+static int buffered_get_fd(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+
+    return qemu_get_fd(s->file);
+}
+
+/*
+ * The meaning of the return values is:
+ *   0: We can continue sending
+ *   1: Time to stop
+ *   negative: There has been an error
+ */
+static int buffered_rate_limit(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+    int ret;
+
+    ret = qemu_file_get_error(s->file);
+    if (ret) {
+        return ret;
+    }
+
+    if (s->bytes_xfer > s->xfer_limit) {
+        return 1;
+    }
+
+    return 0;
+}
+
+static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate)
+{
+    QEMUFileBuffered *s = opaque;
+    if (qemu_file_get_error(s->file)) {
+        goto out;
+    }
+    if (new_rate > SIZE_MAX) {
+        new_rate = SIZE_MAX;
+    }
+
+    s->xfer_limit = new_rate / 10;
+
+out:
+    return s->xfer_limit;
+}
+
+static int64_t buffered_get_rate_limit(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+
+    return s->xfer_limit;
+}
+
+/* 100ms  xfer_limit is the limit that we should write each 100ms */
+#define BUFFER_DELAY 100
+
+static void *buffered_file_thread(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+    int64_t initial_time = qemu_get_clock_ms(rt_clock);
+    int64_t max_size = 0;
+    bool last_round = false;
+
+    while (true) {
+        int64_t current_time = qemu_get_clock_ms(rt_clock);
+
+        if (s->migration_state->complete) {
+            break;
+        }
+        if (current_time >= initial_time + BUFFER_DELAY) {
+            uint64_t transferred_bytes = s->bytes_xfer;
+            uint64_t time_spent = current_time - initial_time;
+            double bandwidth = transferred_bytes / time_spent;
+            max_size = bandwidth * migrate_max_downtime() / 1000000;
+
+            DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
+                    " bandwidth %g max_size %" PRId64 "\n",
+                    transferred_bytes, time_spent, bandwidth, max_size);
+
+            s->bytes_xfer = 0;
+            initial_time = current_time;
+        }
+        if (!last_round && (s->bytes_xfer >= s->xfer_limit)) {
+            /* usleep expects microseconds */
+            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
+        }
+        if (buffered_flush(s) < 0) {
+            break;
+        }
+
+        DPRINTF("file is ready\n");
+        if (s->bytes_xfer < s->xfer_limit) {
+            DPRINTF("notifying client\n");
+            last_round = migrate_fd_put_ready(s->migration_state, max_size);
+        }
+    }
+
+    g_free(s->buffer);
+    g_free(s);
+    return NULL;
+}
+
+static const QEMUFileOps buffered_file_ops = {
+    .get_fd =         buffered_get_fd,
+    .put_buffer =     buffered_put_buffer,
+    .close =          buffered_close,
+    .rate_limit =     buffered_rate_limit,
+    .get_rate_limit = buffered_get_rate_limit,
+    .set_rate_limit = buffered_set_rate_limit,
+};
+
+void qemu_fopen_ops_buffered(MigrationState *migration_state)
+{
+    QEMUFileBuffered *s;
+
+    s = g_malloc0(sizeof(*s));
+
+    s->migration_state = migration_state;
+    s->xfer_limit = migration_state->bandwidth_limit / 10;
+    s->migration_state->complete = false;
+
+    s->file = qemu_fopen_ops(s, &buffered_file_ops);
+
+    migration_state->file = s->file;
+
+    qemu_thread_create(&s->thread, buffered_file_thread, s,
+                       QEMU_THREAD_DETACHED);
+}
commit e4ed1541ac9413eac494a03532e34beaf8a7d1c5
Author: Juan Quintela <quintela at redhat.com>
Date:   Fri Sep 21 11:18:18 2012 +0200

    savevm: New save live migration method: pending
    
    Code just now does (simplified for clarity)
    
        if (qemu_savevm_state_iterate(s->file) == 1) {
           vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
           qemu_savevm_state_complete(s->file);
        }
    
    Problem here is that qemu_savevm_state_iterate() returns 1 when it
    knows that remaining memory to sent takes less than max downtime.
    
    But this means that we could end spending 2x max_downtime, one
    downtime in qemu_savevm_iterate, and the other in
    qemu_savevm_state_complete.
    
    Changed code to:
    
        pending_size = qemu_savevm_state_pending(s->file, max_size);
        DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
        if (pending_size >= max_size) {
            ret = qemu_savevm_state_iterate(s->file);
         } else {
            vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
            qemu_savevm_state_complete(s->file);
         }
    
    So what we do is: at current network speed, we calculate the maximum
    number of bytes we can sent: max_size.
    
    Then we ask every save_live section how much they have pending.  If
    they are less than max_size, we move to complete phase, otherwise we
    do an iterate one.
    
    This makes things much simpler, because now individual sections don't
    have to caluclate the bandwidth (it was implossible to do right from
    there).
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index fad1c40..af1ae9f 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -582,12 +582,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 
 static int ram_save_iterate(QEMUFile *f, void *opaque)
 {
-    uint64_t bytes_transferred_last;
-    double bwidth = 0;
     int ret;
     int i;
-    uint64_t expected_downtime;
-    MigrationState *s = migrate_get_current();
+    int64_t t0;
 
     qemu_mutex_lock_ramlist();
 
@@ -595,9 +592,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         reset_ram_globals();
     }
 
-    bytes_transferred_last = bytes_transferred;
-    bwidth = qemu_get_clock_ns(rt_clock);
-
+    t0 = qemu_get_clock_ns(rt_clock);
     i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
@@ -615,7 +610,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
            iterations
         */
         if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000;
+            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
             if (t1 > MAX_WAIT) {
                 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                         t1, i);
@@ -629,31 +624,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         return ret;
     }
 
-    bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
-    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
-
-    /* if we haven't transferred anything this round, force
-     * expected_downtime to a very high value, but without
-     * crashing */
-    if (bwidth == 0) {
-        bwidth = 0.000001;
-    }
-
     qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
-    expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-    DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(" PRIu64 ")?\n",
-            expected_downtime, migrate_max_downtime());
-
-    if (expected_downtime <= migrate_max_downtime()) {
-        migration_bitmap_sync();
-        expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-        s->expected_downtime = expected_downtime / 1000000; /* ns -> ms */
-
-        return expected_downtime <= migrate_max_downtime();
-    }
-    return 0;
+    return i;
 }
 
 static int ram_save_complete(QEMUFile *f, void *opaque)
@@ -683,6 +657,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     return 0;
 }
 
+static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+    uint64_t remaining_size;
+
+    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+
+    if (remaining_size < max_size) {
+        migration_bitmap_sync();
+        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+    }
+    return remaining_size;
+}
+
 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
 {
     int ret, rc = 0;
@@ -869,6 +856,7 @@ SaveVMHandlers savevm_ram_handlers = {
     .save_live_setup = ram_save_setup,
     .save_live_iterate = ram_save_iterate,
     .save_live_complete = ram_save_complete,
+    .save_live_pending = ram_save_pending,
     .load_state = ram_load,
     .cancel = ram_migration_cancel,
 };
diff --git a/block-migration.c b/block-migration.c
index ca4ba3f..6acf3e1 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -77,9 +77,7 @@ typedef struct BlkMigState {
     int64_t total_sector_sum;
     int prev_progress;
     int bulk_completed;
-    long double total_time;
     long double prev_time_offset;
-    int reads;
 } BlkMigState;
 
 static BlkMigState block_mig_state;
@@ -132,12 +130,6 @@ uint64_t blk_mig_bytes_total(void)
     return sum << BDRV_SECTOR_BITS;
 }
 
-static inline long double compute_read_bwidth(void)
-{
-    assert(block_mig_state.total_time != 0);
-    return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE;
-}
-
 static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
 {
     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -191,8 +183,6 @@ static void blk_mig_read_cb(void *opaque, int ret)
 
     blk->ret = ret;
 
-    block_mig_state.reads++;
-    block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset);
     block_mig_state.prev_time_offset = curr_time;
 
     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
@@ -310,8 +300,6 @@ static void init_blk_migration(QEMUFile *f)
     block_mig_state.total_sector_sum = 0;
     block_mig_state.prev_progress = -1;
     block_mig_state.bulk_completed = 0;
-    block_mig_state.total_time = 0;
-    block_mig_state.reads = 0;
 
     bdrv_iterate(init_blk_migration_it, NULL);
 }
@@ -493,32 +481,6 @@ static int64_t get_remaining_dirty(void)
     return dirty * BLOCK_SIZE;
 }
 
-static int is_stage2_completed(void)
-{
-    int64_t remaining_dirty;
-    long double bwidth;
-
-    if (block_mig_state.bulk_completed == 1) {
-
-        remaining_dirty = get_remaining_dirty();
-        if (remaining_dirty == 0) {
-            return 1;
-        }
-
-        bwidth = compute_read_bwidth();
-
-        if ((remaining_dirty / bwidth) <=
-            migrate_max_downtime()) {
-            /* finish stage2 because we think that we can finish remaining work
-               below max_downtime */
-
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
 static void blk_mig_cleanup(void)
 {
     BlkMigDevState *bmds;
@@ -619,7 +581,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque)
 
     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
 
-    return is_stage2_completed();
+    return 0;
 }
 
 static int block_save_complete(QEMUFile *f, void *opaque)
@@ -659,6 +621,14 @@ static int block_save_complete(QEMUFile *f, void *opaque)
     return 0;
 }
 
+static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+
+    DPRINTF("Enter save live pending  %ld\n", get_remaining_dirty());
+
+    return get_remaining_dirty();
+}
+
 static int block_load(QEMUFile *f, void *opaque, int version_id)
 {
     static int banner_printed;
@@ -755,6 +725,7 @@ SaveVMHandlers savevm_block_handlers = {
     .save_live_setup = block_save_setup,
     .save_live_iterate = block_save_iterate,
     .save_live_complete = block_save_complete,
+    .save_live_pending = block_save_pending,
     .load_state = block_load,
     .cancel = block_migration_cancel,
     .is_active = block_is_active,
diff --git a/buffered_file.c b/buffered_file.c
index be9424b..fdf7efa 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -181,13 +181,15 @@ static int64_t buffered_get_rate_limit(void *opaque)
     return s->xfer_limit;
 }
 
-/* 10ms  xfer_limit is the limit that we should write each 10ms */
+/* 100ms  xfer_limit is the limit that we should write each 100ms */
 #define BUFFER_DELAY 100
 
 static void *buffered_file_thread(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
-    int64_t expire_time = qemu_get_clock_ms(rt_clock) + BUFFER_DELAY;
+    int64_t initial_time = qemu_get_clock_ms(rt_clock);
+    int64_t max_size = 0;
+    bool last_round = false;
 
     while (true) {
         int64_t current_time = qemu_get_clock_ms(rt_clock);
@@ -195,13 +197,22 @@ static void *buffered_file_thread(void *opaque)
         if (s->migration_state->complete) {
             break;
         }
-        if (current_time >= expire_time) {
+        if (current_time >= initial_time + BUFFER_DELAY) {
+            uint64_t transferred_bytes = s->bytes_xfer;
+            uint64_t time_spent = current_time - initial_time;
+            double bandwidth = transferred_bytes / time_spent;
+            max_size = bandwidth * migrate_max_downtime() / 1000000;
+
+            DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64
+                    " bandwidth %g max_size %" PRId64 "\n",
+                    transferred_bytes, time_spent, bandwidth, max_size);
+
             s->bytes_xfer = 0;
-            expire_time = current_time + BUFFER_DELAY;
+            initial_time = current_time;
         }
-        if (s->bytes_xfer >= s->xfer_limit) {
+        if (!last_round && (s->bytes_xfer >= s->xfer_limit)) {
             /* usleep expects microseconds */
-            g_usleep((expire_time - current_time)*1000);
+            g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
         }
         if (buffered_flush(s) < 0) {
             break;
@@ -210,7 +221,7 @@ static void *buffered_file_thread(void *opaque)
         DPRINTF("file is ready\n");
         if (s->bytes_xfer < s->xfer_limit) {
             DPRINTF("notifying client\n");
-            migrate_fd_put_ready(s->migration_state);
+            last_round = migrate_fd_put_ready(s->migration_state, max_size);
         }
     }
 
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 92190f2..9571ec5 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -81,7 +81,7 @@ void migrate_fd_connect(MigrationState *s);
 
 ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
                               size_t size);
-void migrate_fd_put_ready(MigrationState *s);
+bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size);
 int migrate_fd_close(MigrationState *s);
 
 void add_migration_state_change_notifier(Notifier *notify);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 623af0a..f27276c 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -35,6 +35,7 @@ typedef struct SaveVMHandlers {
     int (*save_live_setup)(QEMUFile *f, void *opaque);
     int (*save_live_iterate)(QEMUFile *f, void *opaque);
     int (*save_live_complete)(QEMUFile *f, void *opaque);
+    uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
     void (*cancel)(void *opaque);
     LoadStateHandler *load_state;
     bool (*is_active)(void *opaque);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 8eaa470..28a783e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -78,6 +78,7 @@ int qemu_savevm_state_begin(QEMUFile *f,
 int qemu_savevm_state_iterate(QEMUFile *f);
 int qemu_savevm_state_complete(QEMUFile *f);
 void qemu_savevm_state_cancel(QEMUFile *f);
+uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
 int qemu_loadvm_state(QEMUFile *f);
 
 /* SLIRP */
diff --git a/migration.c b/migration.c
index 11123bc..b6374ae 100644
--- a/migration.c
+++ b/migration.c
@@ -316,15 +316,17 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
     return ret;
 }
 
-void migrate_fd_put_ready(MigrationState *s)
+bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size)
 {
     int ret;
+    uint64_t pending_size;
+    bool last_round = false;
 
     qemu_mutex_lock_iothread();
     if (s->state != MIG_STATE_ACTIVE) {
         DPRINTF("put_ready returning because of non-active state\n");
         qemu_mutex_unlock_iothread();
-        return;
+        return false;
     }
     if (s->first_time) {
         s->first_time = false;
@@ -334,15 +336,19 @@ void migrate_fd_put_ready(MigrationState *s)
             DPRINTF("failed, %d\n", ret);
             migrate_fd_error(s);
             qemu_mutex_unlock_iothread();
-            return;
+            return false;
         }
     }
 
     DPRINTF("iterate\n");
-    ret = qemu_savevm_state_iterate(s->file);
-    if (ret < 0) {
-        migrate_fd_error(s);
-    } else if (ret == 1) {
+    pending_size = qemu_savevm_state_pending(s->file, max_size);
+    DPRINTF("pending size %lu max %lu\n", pending_size, max_size);
+    if (pending_size >= max_size) {
+        ret = qemu_savevm_state_iterate(s->file);
+        if (ret < 0) {
+            migrate_fd_error(s);
+        }
+    } else {
         int old_vm_running = runstate_is_running();
         int64_t start_time, end_time;
 
@@ -368,9 +374,11 @@ void migrate_fd_put_ready(MigrationState *s)
                 vm_start();
             }
         }
+        last_round = true;
     }
     qemu_mutex_unlock_iothread();
 
+    return last_round;
 }
 
 static void migrate_fd_cancel(MigrationState *s)
diff --git a/savevm.c b/savevm.c
index c93d0b4..bcdb92e 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1753,6 +1753,25 @@ int qemu_savevm_state_complete(QEMUFile *f)
     return qemu_file_get_error(f);
 }
 
+uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size)
+{
+    SaveStateEntry *se;
+    uint64_t ret = 0;
+
+    QTAILQ_FOREACH(se, &savevm_handlers, entry) {
+        if (!se->ops || !se->ops->save_live_pending) {
+            continue;
+        }
+        if (se->ops && se->ops->is_active) {
+            if (!se->ops->is_active(se->opaque)) {
+                continue;
+            }
+        }
+        ret += se->ops->save_live_pending(f, se->opaque, max_size);
+    }
+    return ret;
+}
+
 void qemu_savevm_state_cancel(QEMUFile *f)
 {
     SaveStateEntry *se;
commit f50b4986b261fc10065289d2a03deba24d824988
Author: Juan Quintela <quintela at redhat.com>
Date:   Tue Jul 24 14:24:08 2012 +0200

    buffered_file: unfold buffered_append in buffered_put_buffer
    
    It was the only user, and now buffered_put_buffer just do the append
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index 1d7fa24..be9424b 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -41,22 +41,6 @@ typedef struct QEMUFileBuffered
     do { } while (0)
 #endif
 
-static void buffered_append(QEMUFileBuffered *s,
-                            const uint8_t *buf, size_t size)
-{
-    if (size > (s->buffer_capacity - s->buffer_size)) {
-        DPRINTF("increasing buffer capacity from %zu by %zu\n",
-                s->buffer_capacity, size + 1024);
-
-        s->buffer_capacity += size + 1024;
-
-        s->buffer = g_realloc(s->buffer, s->buffer_capacity);
-    }
-
-    memcpy(s->buffer + s->buffer_size, buf, size);
-    s->buffer_size += size;
-}
-
 static ssize_t buffered_flush(QEMUFileBuffered *s)
 {
     size_t offset = 0;
@@ -101,11 +85,22 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
         return error;
     }
 
-    if (size > 0) {
-        DPRINTF("buffering %d bytes\n", size - offset);
-        buffered_append(s, buf, size);
+    if (size <= 0) {
+        return size;
     }
 
+    if (size > (s->buffer_capacity - s->buffer_size)) {
+        DPRINTF("increasing buffer capacity from %zu by %zu\n",
+                s->buffer_capacity, size + 1024);
+
+        s->buffer_capacity += size + 1024;
+
+        s->buffer = g_realloc(s->buffer, s->buffer_capacity);
+    }
+
+    memcpy(s->buffer + s->buffer_size, buf, size);
+    s->buffer_size += size;
+
     return size;
 }
 
commit c518dd841deb85b3ccf77ff93e1142b27b06af32
Author: Juan Quintela <quintela at redhat.com>
Date:   Tue Jul 24 14:00:13 2012 +0200

    buffered_file: don't flush on put buffer
    
    We call buffered_put_buffer with iothread held, and buffered_flush() does
    synchronous writes.  We only want to do the synchronous writes outside.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index d61d805..1d7fa24 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -106,12 +106,6 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
         buffered_append(s, buf, size);
     }
 
-    error = buffered_flush(s);
-    if (error < 0) {
-        DPRINTF("buffered flush error. bailing: %s\n", strerror(-error));
-        return error;
-    }
-
     return size;
 }
 
commit 78d1d231f889f7eae3835ddaec4373011792e46f
Author: Juan Quintela <quintela at redhat.com>
Date:   Tue Jul 24 13:22:18 2012 +0200

    buffered_file: Unfold the trick to restart generating migration data
    
    This was needed before due to the way that the callbacks worked.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index 7743fbd..d61d805 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -112,14 +112,6 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
         return error;
     }
 
-    if (pos == 0 && size == 0) {
-        DPRINTF("file is ready\n");
-        if (s->bytes_xfer < s->xfer_limit) {
-            DPRINTF("notifying client\n");
-            migrate_fd_put_ready(s->migration_state);
-        }
-    }
-
     return size;
 }
 
@@ -222,8 +214,17 @@ static void *buffered_file_thread(void *opaque)
             /* usleep expects microseconds */
             g_usleep((expire_time - current_time)*1000);
         }
-        buffered_put_buffer(s, NULL, 0, 0);
+        if (buffered_flush(s) < 0) {
+            break;
+        }
+
+        DPRINTF("file is ready\n");
+        if (s->bytes_xfer < s->xfer_limit) {
+            DPRINTF("notifying client\n");
+            migrate_fd_put_ready(s->migration_state);
+        }
     }
+
     g_free(s->buffer);
     g_free(s);
     return NULL;
commit e76274824defce54a124e5104be3880044c698e1
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Jul 23 06:31:30 2012 +0200

    migration: just lock migrate_fd_put_ready
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index 14e3a6f..7743fbd 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -222,9 +222,7 @@ static void *buffered_file_thread(void *opaque)
             /* usleep expects microseconds */
             g_usleep((expire_time - current_time)*1000);
         }
-        qemu_mutex_lock_iothread();
         buffered_put_buffer(s, NULL, 0, 0);
-        qemu_mutex_unlock_iothread();
     }
     g_free(s->buffer);
     g_free(s);
diff --git a/migration.c b/migration.c
index 032c9c2..11123bc 100644
--- a/migration.c
+++ b/migration.c
@@ -320,8 +320,10 @@ void migrate_fd_put_ready(MigrationState *s)
 {
     int ret;
 
+    qemu_mutex_lock_iothread();
     if (s->state != MIG_STATE_ACTIVE) {
         DPRINTF("put_ready returning because of non-active state\n");
+        qemu_mutex_unlock_iothread();
         return;
     }
     if (s->first_time) {
@@ -331,6 +333,7 @@ void migrate_fd_put_ready(MigrationState *s)
         if (ret < 0) {
             DPRINTF("failed, %d\n", ret);
             migrate_fd_error(s);
+            qemu_mutex_unlock_iothread();
             return;
         }
     }
@@ -366,6 +369,8 @@ void migrate_fd_put_ready(MigrationState *s)
             }
         }
     }
+    qemu_mutex_unlock_iothread();
+
 }
 
 static void migrate_fd_cancel(MigrationState *s)
commit 188a428559f0cd0bde884d28b42e449abd744c2f
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Jul 23 06:24:03 2012 +0200

    migration: remove unfreeze logic
    
    Now that we have a thread, and blocking writes, we don't need it.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index d61892b..14e3a6f 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -25,7 +25,6 @@ typedef struct QEMUFileBuffered
 {
     MigrationState *migration_state;
     QEMUFile *file;
-    int freeze_output;
     size_t bytes_xfer;
     size_t xfer_limit;
     uint8_t *buffer;
@@ -69,13 +68,6 @@ static ssize_t buffered_flush(QEMUFileBuffered *s)
         size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer);
         ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
                                     to_send);
-        if (ret == -EAGAIN) {
-            DPRINTF("backend not ready, freezing\n");
-            ret = 0;
-            s->freeze_output = 1;
-            break;
-        }
-
         if (ret <= 0) {
             DPRINTF("error flushing data, %zd\n", ret);
             break;
@@ -109,9 +101,6 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
         return error;
     }
 
-    DPRINTF("unfreezing output\n");
-    s->freeze_output = 0;
-
     if (size > 0) {
         DPRINTF("buffering %d bytes\n", size - offset);
         buffered_append(s, buf, size);
@@ -125,7 +114,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in
 
     if (pos == 0 && size == 0) {
         DPRINTF("file is ready\n");
-        if (!s->freeze_output && s->bytes_xfer < s->xfer_limit) {
+        if (s->bytes_xfer < s->xfer_limit) {
             DPRINTF("notifying client\n");
             migrate_fd_put_ready(s->migration_state);
         }
@@ -148,12 +137,6 @@ static int buffered_close(void *opaque)
         if (ret < 0) {
             break;
         }
-        if (s->freeze_output) {
-            ret = migrate_fd_wait_for_unfreeze(s->migration_state);
-            if (ret < 0) {
-                break;
-            }
-        }
     }
 
     ret2 = migrate_fd_close(s->migration_state);
@@ -187,8 +170,6 @@ static int buffered_rate_limit(void *opaque)
     if (ret) {
         return ret;
     }
-    if (s->freeze_output)
-        return 1;
 
     if (s->bytes_xfer > s->xfer_limit)
         return 1;
@@ -233,9 +214,6 @@ static void *buffered_file_thread(void *opaque)
         if (s->migration_state->complete) {
             break;
         }
-        if (s->freeze_output) {
-            continue;
-        }
         if (current_time >= expire_time) {
             s->bytes_xfer = 0;
             expire_time = current_time + BUFFER_DELAY;
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 0ce3720..92190f2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -82,7 +82,6 @@ void migrate_fd_connect(MigrationState *s);
 ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
                               size_t size);
 void migrate_fd_put_ready(MigrationState *s);
-int migrate_fd_wait_for_unfreeze(MigrationState *s);
 int migrate_fd_close(MigrationState *s);
 
 void add_migration_state_change_notifier(Notifier *notify);
diff --git a/migration.c b/migration.c
index 207c754..032c9c2 100644
--- a/migration.c
+++ b/migration.c
@@ -382,29 +382,6 @@ static void migrate_fd_cancel(MigrationState *s)
     migrate_fd_cleanup(s);
 }
 
-int migrate_fd_wait_for_unfreeze(MigrationState *s)
-{
-    int ret;
-
-    DPRINTF("wait for unfreeze\n");
-    if (s->state != MIG_STATE_ACTIVE)
-        return -EINVAL;
-
-    do {
-        fd_set wfds;
-
-        FD_ZERO(&wfds);
-        FD_SET(s->fd, &wfds);
-
-        ret = select(s->fd + 1, NULL, &wfds, NULL, NULL);
-    } while (ret == -1 && (s->get_error(s)) == EINTR);
-
-    if (ret == -1) {
-        return -s->get_error(s);
-    }
-    return 0;
-}
-
 int migrate_fd_close(MigrationState *s)
 {
     int rc = 0;
commit dd217b8732b93d97c22fa70dc15a72d92a2b2380
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Jul 23 06:15:02 2012 +0200

    migration: make writes blocking
    
    Move all the writes to the migration_thread, and make writings
    blocking.  Notice that are still using the iothread for everything
    that we do.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index d64bdbb..68deefb 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -113,11 +113,6 @@ int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
 int64_t qemu_file_get_rate_limit(QEMUFile *f);
 int qemu_file_get_error(QEMUFile *f);
 
-/* Try to send any outstanding data.  This function is useful when output is
- * halted due to rate limiting or EAGAIN errors occur as it can be used to
- * resume output. */
-int qemu_file_put_notify(QEMUFile *f);
-
 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
 {
     qemu_put_be64(f, *pv);
diff --git a/migration-exec.c b/migration-exec.c
index b7b760b..a051a6e 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -69,7 +69,6 @@ void exec_start_outgoing_migration(MigrationState *s, const char *command, Error
 
     s->fd = fileno(f);
     assert(s->fd != -1);
-    socket_set_nonblock(s->fd);
 
     s->opaque = qemu_popen(f, "w");
 
diff --git a/migration-fd.c b/migration-fd.c
index 5086a90..a99e0e3 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -75,7 +75,6 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **
         return;
     }
 
-    fcntl(s->fd, F_SETFL, O_NONBLOCK);
     s->get_error = fd_errno;
     s->write = fd_write;
     s->close = fd_close;
diff --git a/migration-tcp.c b/migration-tcp.c
index 1fca428..e78a296 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -60,6 +60,7 @@ static void tcp_wait_for_connect(int fd, void *opaque)
     } else {
         DPRINTF("migrate connect success\n");
         s->fd = fd;
+        socket_set_block(s->fd);
         migrate_fd_connect(s);
     }
 }
diff --git a/migration-unix.c b/migration-unix.c
index f2f368c..218835a 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -60,6 +60,7 @@ static void unix_wait_for_connect(int fd, void *opaque)
     } else {
         DPRINTF("migrate connect success\n");
         s->fd = fd;
+        socket_set_block(s->fd);
         migrate_fd_connect(s);
     }
 }
diff --git a/migration.c b/migration.c
index 249eea3..207c754 100644
--- a/migration.c
+++ b/migration.c
@@ -297,18 +297,6 @@ static void migrate_fd_completed(MigrationState *s)
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
-static void migrate_fd_put_notify(void *opaque)
-{
-    MigrationState *s = opaque;
-    int ret;
-
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-    ret = qemu_file_put_notify(s->file);
-    if (ret) {
-        migrate_fd_error(s);
-    }
-}
-
 ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
                               size_t size)
 {
@@ -325,10 +313,6 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
     if (ret == -1)
         ret = -(s->get_error(s));
 
-    if (ret == -EAGAIN) {
-        qemu_set_fd_handler2(s->fd, NULL, NULL, migrate_fd_put_notify, s);
-    }
-
     return ret;
 }
 
@@ -425,7 +409,6 @@ int migrate_fd_close(MigrationState *s)
 {
     int rc = 0;
     if (s->fd != -1) {
-        qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
         rc = s->close(s);
         s->fd = -1;
     }
diff --git a/savevm.c b/savevm.c
index b2a844f..c93d0b4 100644
--- a/savevm.c
+++ b/savevm.c
@@ -555,11 +555,6 @@ int qemu_fclose(QEMUFile *f)
     return ret;
 }
 
-int qemu_file_put_notify(QEMUFile *f)
-{
-    return f->ops->put_buffer(f->opaque, NULL, 0, 0);
-}
-
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
 {
     int l;
commit 766bd1769e70835e0cc25f3f057f101619494b59
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Jul 23 05:45:29 2012 +0200

    migration: move migration thread init code to migrate_fd_put_ready
    
    This way everything related with migration is run on the migration
    thread and no locking is needed.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/include/migration/migration.h b/include/migration/migration.h
index fa1fb8f..0ce3720 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -46,6 +46,7 @@ struct MigrationState
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
     int64_t xbzrle_cache_size;
     bool complete;
+    bool first_time;
 };
 
 void process_incoming_migration(QEMUFile *f);
diff --git a/migration.c b/migration.c
index 5450c3b..249eea3 100644
--- a/migration.c
+++ b/migration.c
@@ -21,6 +21,7 @@
 #include "block/block.h"
 #include "qemu/sockets.h"
 #include "migration/block.h"
+#include "qemu/thread.h"
 #include "qmp-commands.h"
 
 //#define DEBUG_MIGRATION
@@ -339,6 +340,16 @@ void migrate_fd_put_ready(MigrationState *s)
         DPRINTF("put_ready returning because of non-active state\n");
         return;
     }
+    if (s->first_time) {
+        s->first_time = false;
+        DPRINTF("beginning savevm\n");
+        ret = qemu_savevm_state_begin(s->file, &s->params);
+        if (ret < 0) {
+            DPRINTF("failed, %d\n", ret);
+            migrate_fd_error(s);
+            return;
+        }
+    }
 
     DPRINTF("iterate\n");
     ret = qemu_savevm_state_iterate(s->file);
@@ -351,7 +362,11 @@ void migrate_fd_put_ready(MigrationState *s)
         DPRINTF("done iterating\n");
         start_time = qemu_get_clock_ms(rt_clock);
         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-        vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+        if (old_vm_running) {
+            vm_stop(RUN_STATE_FINISH_MIGRATE);
+        } else {
+            vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+        }
 
         if (qemu_savevm_state_complete(s->file) < 0) {
             migrate_fd_error(s);
@@ -445,19 +460,9 @@ bool migration_has_failed(MigrationState *s)
 
 void migrate_fd_connect(MigrationState *s)
 {
-    int ret;
-
     s->state = MIG_STATE_ACTIVE;
+    s->first_time = true;
     qemu_fopen_ops_buffered(s);
-
-    DPRINTF("beginning savevm\n");
-    ret = qemu_savevm_state_begin(s->file, &s->params);
-    if (ret < 0) {
-        DPRINTF("failed, %d\n", ret);
-        migrate_fd_error(s);
-        return;
-    }
-    migrate_fd_put_ready(s);
 }
 
 static MigrationState *migrate_init(const MigrationParams *params)
commit edfa1af52f4c69264c5a0c38da10eb372077fba3
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Jul 23 02:13:23 2012 +0200

    migration: make qemu_fopen_ops_buffered() return void
    
    We want the file assignment to happen before the thread is created to
    avoid locking, so we just do it before creating the thread.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Orit Wasserman <owasserm at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index 7059099..d61892b 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -32,7 +32,6 @@ typedef struct QEMUFileBuffered
     size_t buffer_size;
     size_t buffer_capacity;
     QemuThread thread;
-    bool complete;
 } QEMUFileBuffered;
 
 #ifdef DEBUG_BUFFERED_FILE
@@ -162,7 +161,7 @@ static int buffered_close(void *opaque)
         ret = ret2;
     }
     ret = migrate_fd_close(s->migration_state);
-    s->complete = true;
+    s->migration_state->complete = true;
     return ret;
 }
 
@@ -231,7 +230,7 @@ static void *buffered_file_thread(void *opaque)
     while (true) {
         int64_t current_time = qemu_get_clock_ms(rt_clock);
 
-        if (s->complete) {
+        if (s->migration_state->complete) {
             break;
         }
         if (s->freeze_output) {
@@ -263,7 +262,7 @@ static const QEMUFileOps buffered_file_ops = {
     .set_rate_limit = buffered_set_rate_limit,
 };
 
-QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state)
+void qemu_fopen_ops_buffered(MigrationState *migration_state)
 {
     QEMUFileBuffered *s;
 
@@ -271,12 +270,12 @@ QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state)
 
     s->migration_state = migration_state;
     s->xfer_limit = migration_state->bandwidth_limit / 10;
-    s->complete = false;
+    s->migration_state->complete = false;
 
     s->file = qemu_fopen_ops(s, &buffered_file_ops);
 
+    migration_state->file = s->file;
+
     qemu_thread_create(&s->thread, buffered_file_thread, s,
                        QEMU_THREAD_DETACHED);
-
-    return s->file;
 }
diff --git a/buffered_file.h b/buffered_file.h
index 86a7075..d278f3d 100644
--- a/buffered_file.h
+++ b/buffered_file.h
@@ -17,6 +17,6 @@
 #include "hw/hw.h"
 #include "migration/migration.h"
 
-QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state);
+void qemu_fopen_ops_buffered(MigrationState *migration_state);
 
 #endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 8b7af61..fa1fb8f 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -45,6 +45,7 @@ struct MigrationState
     int64_t dirty_pages_rate;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
     int64_t xbzrle_cache_size;
+    bool complete;
 };
 
 void process_incoming_migration(QEMUFile *f);
diff --git a/migration.c b/migration.c
index e560930..5450c3b 100644
--- a/migration.c
+++ b/migration.c
@@ -448,7 +448,7 @@ void migrate_fd_connect(MigrationState *s)
     int ret;
 
     s->state = MIG_STATE_ACTIVE;
-    s->file = qemu_fopen_ops_buffered(s);
+    qemu_fopen_ops_buffered(s);
 
     DPRINTF("beginning savevm\n");
     ret = qemu_savevm_state_begin(s->file, &s->params);
commit c09f4cb2b3243085a86aee3c7ed4f31c77e4db87
Author: Juan Quintela <quintela at redhat.com>
Date:   Mon Jul 23 01:56:50 2012 +0200

    buffered_file: Move from using a timer to use a thread
    
    We still protect everything except the wait with the iothread lock.
    But we moved from a timer to a thread.  Steps one by one.
    
    We also need to detect when we have finished with a variable "complete".
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index 1de47e0..7059099 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -17,6 +17,7 @@
 #include "hw/hw.h"
 #include "qemu/timer.h"
 #include "buffered_file.h"
+#include "qemu/thread.h"
 
 //#define DEBUG_BUFFERED_FILE
 
@@ -30,7 +31,8 @@ typedef struct QEMUFileBuffered
     uint8_t *buffer;
     size_t buffer_size;
     size_t buffer_capacity;
-    QEMUTimer *timer;
+    QemuThread thread;
+    bool complete;
 } QEMUFileBuffered;
 
 #ifdef DEBUG_BUFFERED_FILE
@@ -159,11 +161,8 @@ static int buffered_close(void *opaque)
     if (ret >= 0) {
         ret = ret2;
     }
-    qemu_del_timer(s->timer);
-    qemu_free_timer(s->timer);
-    g_free(s->buffer);
-    g_free(s);
-
+    ret = migrate_fd_close(s->migration_state);
+    s->complete = true;
     return ret;
 }
 
@@ -221,23 +220,38 @@ static int64_t buffered_get_rate_limit(void *opaque)
     return s->xfer_limit;
 }
 
-static void buffered_rate_tick(void *opaque)
+/* 10ms  xfer_limit is the limit that we should write each 10ms */
+#define BUFFER_DELAY 100
+
+static void *buffered_file_thread(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
+    int64_t expire_time = qemu_get_clock_ms(rt_clock) + BUFFER_DELAY;
 
-    if (qemu_file_get_error(s->file)) {
-        buffered_close(s);
-        return;
-    }
-
-    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
-
-    if (s->freeze_output)
-        return;
-
-    s->bytes_xfer = 0;
+    while (true) {
+        int64_t current_time = qemu_get_clock_ms(rt_clock);
 
-    buffered_put_buffer(s, NULL, 0, 0);
+        if (s->complete) {
+            break;
+        }
+        if (s->freeze_output) {
+            continue;
+        }
+        if (current_time >= expire_time) {
+            s->bytes_xfer = 0;
+            expire_time = current_time + BUFFER_DELAY;
+        }
+        if (s->bytes_xfer >= s->xfer_limit) {
+            /* usleep expects microseconds */
+            g_usleep((expire_time - current_time)*1000);
+        }
+        qemu_mutex_lock_iothread();
+        buffered_put_buffer(s, NULL, 0, 0);
+        qemu_mutex_unlock_iothread();
+    }
+    g_free(s->buffer);
+    g_free(s);
+    return NULL;
 }
 
 static const QEMUFileOps buffered_file_ops = {
@@ -257,12 +271,12 @@ QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state)
 
     s->migration_state = migration_state;
     s->xfer_limit = migration_state->bandwidth_limit / 10;
+    s->complete = false;
 
     s->file = qemu_fopen_ops(s, &buffered_file_ops);
 
-    s->timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s);
-
-    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
+    qemu_thread_create(&s->thread, buffered_file_thread, s,
+                       QEMU_THREAD_DETACHED);
 
     return s->file;
 }
commit b2a8658ef5dc57ea9e7a45091724a719dd4bdcd3
Author: Umesh Deshpande <udeshpan at redhat.com>
Date:   Wed Aug 17 00:01:33 2011 -0700

    protect the ramlist with a separate mutex
    
    Add the new mutex that protects shared state between ram_save_live
    and the iothread.  If the iothread mutex has to be taken together
    with the ramlist mutex, the iothread shall always be _outside_.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Umesh Deshpande <udeshpan at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Orit Wasserman <owasserm at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 1f737dc..fad1c40 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -528,7 +528,6 @@ static void ram_migration_cancel(void *opaque)
     migration_end();
 }
 
-
 static void reset_ram_globals(void)
 {
     last_block = NULL;
@@ -547,6 +546,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     bitmap_set(migration_bitmap, 0, ram_pages);
     migration_dirty_pages = ram_pages;
 
+    qemu_mutex_lock_ramlist();
     bytes_transferred = 0;
     reset_ram_globals();
 
@@ -574,6 +574,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
         qemu_put_be64(f, block->length);
     }
 
+    qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
@@ -588,6 +589,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     uint64_t expected_downtime;
     MigrationState *s = migrate_get_current();
 
+    qemu_mutex_lock_ramlist();
+
     if (ram_list.version != last_version) {
         reset_ram_globals();
     }
@@ -636,6 +639,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         bwidth = 0.000001;
     }
 
+    qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
@@ -656,6 +660,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 {
     migration_bitmap_sync();
 
+    qemu_mutex_lock_ramlist();
+
     /* try transferring iterative blocks of memory */
 
     /* flush all remaining blocks regardless of rate limiting */
@@ -671,6 +677,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     }
     migration_end();
 
+    qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
diff --git a/exec.c b/exec.c
index 8478bef..a6923ad 100644
--- a/exec.c
+++ b/exec.c
@@ -213,6 +213,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 void cpu_exec_init_all(void)
 {
 #if !defined(CONFIG_USER_ONLY)
+    qemu_mutex_init(&ram_list.mutex);
     memory_map_init();
     io_mem_init();
 #endif
@@ -801,6 +802,16 @@ void qemu_flush_coalesced_mmio_buffer(void)
         kvm_flush_coalesced_mmio_buffer();
 }
 
+void qemu_mutex_lock_ramlist(void)
+{
+    qemu_mutex_lock(&ram_list.mutex);
+}
+
+void qemu_mutex_unlock_ramlist(void)
+{
+    qemu_mutex_unlock(&ram_list.mutex);
+}
+
 #if defined(__linux__) && !defined(TARGET_S390X)
 
 #include <sys/vfs.h>
@@ -982,6 +993,8 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
@@ -989,6 +1002,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
             abort();
         }
     }
+    qemu_mutex_unlock_ramlist();
 }
 
 static int memory_try_enable_merging(void *addr, size_t len)
@@ -1012,6 +1026,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     size = TARGET_PAGE_ALIGN(size);
     new_block = g_malloc0(sizeof(*new_block));
 
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
     new_block->mr = mr;
     new_block->offset = find_ram_offset(size);
     if (host) {
@@ -1057,6 +1073,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     ram_list.mru_block = NULL;
 
     ram_list.version++;
+    qemu_mutex_unlock_ramlist();
 
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
@@ -1082,21 +1099,26 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QTAILQ_REMOVE(&ram_list.blocks, block, next);
             ram_list.mru_block = NULL;
             ram_list.version++;
             g_free(block);
-            return;
+            break;
         }
     }
+    qemu_mutex_unlock_ramlist();
 }
 
 void qemu_ram_free(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    /* This assumes the iothread lock is taken here too.  */
+    qemu_mutex_lock_ramlist();
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QTAILQ_REMOVE(&ram_list.blocks, block, next);
@@ -1127,9 +1149,10 @@ void qemu_ram_free(ram_addr_t addr)
 #endif
             }
             g_free(block);
-            return;
+            break;
         }
     }
+    qemu_mutex_unlock_ramlist();
 
 }
 
@@ -1207,6 +1230,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    /* The list is protected by the iothread lock here.  */
     block = ram_list.mru_block;
     if (block && addr - block->offset < block->length) {
         goto found;
@@ -1246,6 +1270,7 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    /* The list is protected by the iothread lock here.  */
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
             if (xen_enabled()) {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index f052b38..439e88d 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -22,6 +22,7 @@
 #include "qemu-common.h"
 #include "qemu/tls.h"
 #include "exec/cpu-common.h"
+#include "qemu/thread.h"
 
 /* some important defines:
  *
@@ -487,6 +488,9 @@ typedef struct RAMBlock {
     ram_addr_t length;
     uint32_t flags;
     char idstr[256];
+    /* Reads can take either the iothread or the ramlist lock.
+     * Writes must take both locks.
+     */
     QTAILQ_ENTRY(RAMBlock) next;
 #if defined(__linux__) && !defined(TARGET_S390X)
     int fd;
@@ -494,8 +498,11 @@ typedef struct RAMBlock {
 } RAMBlock;
 
 typedef struct RAMList {
+    QemuMutex mutex;
+    /* Protected by the iothread lock.  */
     uint8_t *phys_dirty;
     RAMBlock *mru_block;
+    /* Protected by the ramlist lock.  */
     QTAILQ_HEAD(, RAMBlock) blocks;
     uint32_t version;
 } RAMList;
@@ -516,6 +523,8 @@ extern int mem_prealloc;
 
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 ram_addr_t last_ram_offset(void);
+void qemu_mutex_lock_ramlist(void);
+void qemu_mutex_unlock_ramlist(void);
 #endif /* !CONFIG_USER_ONLY */
 
 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
commit f798b07f517143df3a1e38bccc3f72ade2f080dc
Author: Umesh Deshpande <udeshpan at redhat.com>
Date:   Thu Aug 18 11:41:17 2011 -0700

    add a version number to ram_list
    
    This will be used to detect if last_block might have become invalid
    across different calls to ram_save_live.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Umesh Deshpande <udeshpan at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>
    
    Reviewed-by: Orit Wasserman <owasserm at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 5c8df3a..1f737dc 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -336,6 +336,7 @@ static RAMBlock *last_block;
 static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
+static uint32_t last_version;
 
 static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
                                                          ram_addr_t offset)
@@ -406,7 +407,6 @@ static void migration_bitmap_sync(void)
     }
 }
 
-
 /*
  * ram_save_block: Writes a page of memory to the stream f
  *
@@ -533,6 +533,7 @@ static void reset_ram_globals(void)
 {
     last_block = NULL;
     last_offset = 0;
+    last_version = ram_list.version;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -587,6 +588,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     uint64_t expected_downtime;
     MigrationState *s = migrate_get_current();
 
+    if (ram_list.version != last_version) {
+        reset_ram_globals();
+    }
+
     bytes_transferred_last = bytes_transferred;
     bwidth = qemu_get_clock_ns(rt_clock);
 
diff --git a/exec.c b/exec.c
index 1ee4fa6..8478bef 100644
--- a/exec.c
+++ b/exec.c
@@ -1056,6 +1056,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     }
     ram_list.mru_block = NULL;
 
+    ram_list.version++;
+
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
@@ -1084,6 +1086,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
         if (addr == block->offset) {
             QTAILQ_REMOVE(&ram_list.blocks, block, next);
             ram_list.mru_block = NULL;
+            ram_list.version++;
             g_free(block);
             return;
         }
@@ -1098,6 +1101,7 @@ void qemu_ram_free(ram_addr_t addr)
         if (addr == block->offset) {
             QTAILQ_REMOVE(&ram_list.blocks, block, next);
             ram_list.mru_block = NULL;
+            ram_list.version++;
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
             } else if (mem_path) {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 612e950..f052b38 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -497,6 +497,7 @@ typedef struct RAMList {
     uint8_t *phys_dirty;
     RAMBlock *mru_block;
     QTAILQ_HEAD(, RAMBlock) blocks;
+    uint32_t version;
 } RAMList;
 extern RAMList ram_list;
 
commit abb26d63e7e4492d306c13b7e0e799d4c11a067c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Nov 14 16:00:51 2012 +0100

    exec: sort the memory from biggest to smallest
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 67b14d2..5c8df3a 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -505,35 +505,6 @@ uint64_t ram_bytes_total(void)
     return total;
 }
 
-static int block_compar(const void *a, const void *b)
-{
-    RAMBlock * const *ablock = a;
-    RAMBlock * const *bblock = b;
-
-    return strcmp((*ablock)->idstr, (*bblock)->idstr);
-}
-
-static void sort_ram_list(void)
-{
-    RAMBlock *block, *nblock, **blocks;
-    int n;
-    n = 0;
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        ++n;
-    }
-    blocks = g_malloc(n * sizeof *blocks);
-    n = 0;
-    QTAILQ_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
-        blocks[n++] = block;
-        QTAILQ_REMOVE(&ram_list.blocks, block, next);
-    }
-    qsort(blocks, n, sizeof *blocks, block_compar);
-    while (--n >= 0) {
-        QTAILQ_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
-    }
-    g_free(blocks);
-}
-
 static void migration_end(void)
 {
     if (migration_bitmap) {
@@ -562,7 +533,6 @@ static void reset_ram_globals(void)
 {
     last_block = NULL;
     last_offset = 0;
-    sort_ram_list();
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
diff --git a/exec.c b/exec.c
index 584279a..1ee4fa6 100644
--- a/exec.c
+++ b/exec.c
@@ -1007,7 +1007,7 @@ static int memory_try_enable_merging(void *addr, size_t len)
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
-    RAMBlock *new_block;
+    RAMBlock *block, *new_block;
 
     size = TARGET_PAGE_ALIGN(size);
     new_block = g_malloc0(sizeof(*new_block));
@@ -1043,7 +1043,17 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     }
     new_block->length = size;
 
-    QTAILQ_INSERT_HEAD(&ram_list.blocks, new_block, next);
+    /* Keep the list sorted from biggest to smallest block.  */
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (block->length < new_block->length) {
+            break;
+        }
+    }
+    if (block) {
+        QTAILQ_INSERT_BEFORE(block, new_block, next);
+    } else {
+        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
+    }
     ram_list.mru_block = NULL;
 
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
commit a3161038a1fd17a638a0c606f71e1f799f65f41b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Nov 14 15:54:48 2012 +0100

    exec: change RAM list to a TAILQ
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index 6695ccf..67b14d2 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -382,7 +382,7 @@ static void migration_bitmap_sync(void)
     trace_migration_bitmap_sync_start();
     memory_global_sync_dirty_bitmap(get_system_memory());
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
             if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
                                         DIRTY_MEMORY_MIGRATION)) {
@@ -424,7 +424,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
     ram_addr_t current_addr;
 
     if (!block)
-        block = QLIST_FIRST(&ram_list.blocks);
+        block = QTAILQ_FIRST(&ram_list.blocks);
 
     do {
         mr = block->mr;
@@ -465,9 +465,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
         offset += TARGET_PAGE_SIZE;
         if (offset >= block->length) {
             offset = 0;
-            block = QLIST_NEXT(block, next);
+            block = QTAILQ_NEXT(block, next);
             if (!block)
-                block = QLIST_FIRST(&ram_list.blocks);
+                block = QTAILQ_FIRST(&ram_list.blocks);
         }
     } while (block != last_block || offset != last_offset);
 
@@ -499,7 +499,7 @@ uint64_t ram_bytes_total(void)
     RAMBlock *block;
     uint64_t total = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next)
+    QTAILQ_FOREACH(block, &ram_list.blocks, next)
         total += block->length;
 
     return total;
@@ -518,18 +518,18 @@ static void sort_ram_list(void)
     RAMBlock *block, *nblock, **blocks;
     int n;
     n = 0;
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         ++n;
     }
     blocks = g_malloc(n * sizeof *blocks);
     n = 0;
-    QLIST_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
+    QTAILQ_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
         blocks[n++] = block;
-        QLIST_REMOVE(block, next);
+        QTAILQ_REMOVE(&ram_list.blocks, block, next);
     }
     qsort(blocks, n, sizeof *blocks, block_compar);
     while (--n >= 0) {
-        QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
+        QTAILQ_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
     }
     g_free(blocks);
 }
@@ -597,7 +597,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
         qemu_put_be64(f, block->length);
@@ -763,7 +763,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     qemu_get_buffer(f, (uint8_t *)id, len);
     id[len] = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (!strncmp(id, block->idstr, sizeof(id)))
             return memory_region_get_ram_ptr(block->mr) + offset;
     }
@@ -807,7 +807,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                     id[len] = 0;
                     length = qemu_get_be64(f);
 
-                    QLIST_FOREACH(block, &ram_list.blocks, next) {
+                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
                             if (block->length != length) {
                                 ret =  -EINVAL;
diff --git a/dump.c b/dump.c
index a26b1a5..4ed1fa8 100644
--- a/dump.c
+++ b/dump.c
@@ -427,7 +427,7 @@ static hwaddr get_offset(hwaddr phys_addr,
         }
     }
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (s->has_filter) {
             if (block->offset >= s->begin + s->length ||
                 block->offset + block->length <= s->begin) {
@@ -594,7 +594,7 @@ static int dump_completed(DumpState *s)
 static int get_next_block(DumpState *s, RAMBlock *block)
 {
     while (1) {
-        block = QLIST_NEXT(block, next);
+        block = QTAILQ_NEXT(block, next);
         if (!block) {
             /* no more block */
             return 1;
@@ -670,11 +670,11 @@ static ram_addr_t get_start_block(DumpState *s)
     RAMBlock *block;
 
     if (!s->has_filter) {
-        s->block = QLIST_FIRST(&ram_list.blocks);
+        s->block = QTAILQ_FIRST(&ram_list.blocks);
         return 0;
     }
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (block->offset >= s->begin + s->length ||
             block->offset + block->length <= s->begin) {
             /* This block is out of the range */
diff --git a/exec.c b/exec.c
index 78bae1c..584279a 100644
--- a/exec.c
+++ b/exec.c
@@ -57,7 +57,7 @@
 int phys_ram_fd;
 static int in_migration;
 
-RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
+RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
 
 static MemoryRegion *system_memory;
 static MemoryRegion *system_io;
@@ -902,15 +902,15 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
     RAMBlock *block, *next_block;
     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
 
-    if (QLIST_EMPTY(&ram_list.blocks))
+    if (QTAILQ_EMPTY(&ram_list.blocks))
         return 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         ram_addr_t end, next = RAM_ADDR_MAX;
 
         end = block->offset + block->length;
 
-        QLIST_FOREACH(next_block, &ram_list.blocks, next) {
+        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
             if (next_block->offset >= end) {
                 next = MIN(next, next_block->offset);
             }
@@ -935,7 +935,7 @@ ram_addr_t last_ram_offset(void)
     RAMBlock *block;
     ram_addr_t last = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next)
+    QTAILQ_FOREACH(block, &ram_list.blocks, next)
         last = MAX(last, block->offset + block->length);
 
     return last;
@@ -964,7 +964,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
     RAMBlock *new_block, *block;
 
     new_block = NULL;
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (block->offset == addr) {
             new_block = block;
             break;
@@ -982,7 +982,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                     new_block->idstr);
@@ -1043,7 +1043,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     }
     new_block->length = size;
 
-    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+    QTAILQ_INSERT_HEAD(&ram_list.blocks, new_block, next);
     ram_list.mru_block = NULL;
 
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
@@ -1070,9 +1070,9 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
-            QLIST_REMOVE(block, next);
+            QTAILQ_REMOVE(&ram_list.blocks, block, next);
             ram_list.mru_block = NULL;
             g_free(block);
             return;
@@ -1084,9 +1084,9 @@ void qemu_ram_free(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
-            QLIST_REMOVE(block, next);
+            QTAILQ_REMOVE(&ram_list.blocks, block, next);
             ram_list.mru_block = NULL;
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
@@ -1127,7 +1127,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
     int flags;
     void *area, *vaddr;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
         if (offset < block->length) {
             vaddr = block->host + offset;
@@ -1197,7 +1197,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
     if (block && addr - block->offset < block->length) {
         goto found;
     }
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
             goto found;
         }
@@ -1232,7 +1232,7 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
             if (xen_enabled()) {
                 /* We need to check if the requested address is in the RAM
@@ -1268,7 +1268,7 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
     } else {
         RAMBlock *block;
 
-        QLIST_FOREACH(block, &ram_list.blocks, next) {
+        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
             if (addr - block->offset < block->length) {
                 if (addr - block->offset + *size > block->length)
                     *size = block->length - addr + block->offset;
@@ -1296,7 +1296,7 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         return 0;
     }
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
             continue;
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 9fe6fc0..612e950 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -487,7 +487,7 @@ typedef struct RAMBlock {
     ram_addr_t length;
     uint32_t flags;
     char idstr[256];
-    QLIST_ENTRY(RAMBlock) next;
+    QTAILQ_ENTRY(RAMBlock) next;
 #if defined(__linux__) && !defined(TARGET_S390X)
     int fd;
 #endif
@@ -496,7 +496,7 @@ typedef struct RAMBlock {
 typedef struct RAMList {
     uint8_t *phys_dirty;
     RAMBlock *mru_block;
-    QLIST_HEAD(, RAMBlock) blocks;
+    QTAILQ_HEAD(, RAMBlock) blocks;
 } RAMList;
 extern RAMList ram_list;
 
diff --git a/memory_mapping.c b/memory_mapping.c
index 530f1d6..ff45b3a 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -200,7 +200,7 @@ int qemu_get_guest_memory_mapping(MemoryMappingList *list)
      * If the guest doesn't use paging, the virtual address is equal to physical
      * address.
      */
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         offset = block->offset;
         length = block->length;
         create_new_memory_mapping(list, offset, offset, length);
@@ -213,7 +213,7 @@ void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
 {
     RAMBlock *block;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         create_new_memory_mapping(list, block->offset, 0, block->length);
     }
 }
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
index 8209ce9..2cd2f7f 100644
--- a/target-i386/arch_dump.c
+++ b/target-i386/arch_dump.c
@@ -403,7 +403,7 @@ int cpu_get_dump_info(ArchDumpInfo *info)
     } else {
         info->d_class = ELFCLASS32;
 
-        QLIST_FOREACH(block, &ram_list.blocks, next) {
+        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
             if (block->offset + block->length > UINT_MAX) {
                 /* The memory size is greater than 4G */
                 info->d_class = ELFCLASS64;
commit 0d6d3c87a232cc27641dde3491d75c8021745d02
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Nov 14 15:45:02 2012 +0100

    exec: change ramlist from MRU order to a 1-item cache
    
    Most of the time, only 2 items will be active (from/to for a string operation,
    or code/data).  But TCG guests likely won't have gigabytes of memory, so
    this actually goes down to 1 item.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index e01f0e5..6695ccf 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -48,6 +48,7 @@
 #include "qemu/config-file.h"
 #include "qmp-commands.h"
 #include "trace.h"
+#include "exec/cpu-all.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
diff --git a/exec.c b/exec.c
index 28abd7e..78bae1c 100644
--- a/exec.c
+++ b/exec.c
@@ -43,6 +43,7 @@
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
 #endif
+#include "exec/cpu-all.h"
 
 #include "exec/cputlb.h"
 #include "translate-all.h"
@@ -1043,6 +1044,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
     new_block->length = size;
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
+    ram_list.mru_block = NULL;
 
     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
@@ -1071,6 +1073,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
+            ram_list.mru_block = NULL;
             g_free(block);
             return;
         }
@@ -1084,6 +1087,7 @@ void qemu_ram_free(ram_addr_t addr)
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
             QLIST_REMOVE(block, next);
+            ram_list.mru_block = NULL;
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
             } else if (mem_path) {
@@ -1189,37 +1193,40 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
+    block = ram_list.mru_block;
+    if (block && addr - block->offset < block->length) {
+        goto found;
+    }
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->length) {
-            /* Move this entry to to start of the list.  */
-            if (block != QLIST_FIRST(&ram_list.blocks)) {
-                QLIST_REMOVE(block, next);
-                QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
-            }
-            if (xen_enabled()) {
-                /* We need to check if the requested address is in the RAM
-                 * because we don't want to map the entire memory in QEMU.
-                 * In that case just map until the end of the page.
-                 */
-                if (block->offset == 0) {
-                    return xen_map_cache(addr, 0, 0);
-                } else if (block->host == NULL) {
-                    block->host =
-                        xen_map_cache(block->offset, block->length, 1);
-                }
-            }
-            return block->host + (addr - block->offset);
+            goto found;
         }
     }
 
     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
     abort();
 
-    return NULL;
+found:
+    ram_list.mru_block = block;
+    if (xen_enabled()) {
+        /* We need to check if the requested address is in the RAM
+         * because we don't want to map the entire memory in QEMU.
+         * In that case just map until the end of the page.
+         */
+        if (block->offset == 0) {
+            return xen_map_cache(addr, 0, 0);
+        } else if (block->host == NULL) {
+            block->host =
+                xen_map_cache(block->offset, block->length, 1);
+        }
+    }
+    return block->host + (addr - block->offset);
 }
 
-/* Return a host pointer to ram allocated with qemu_ram_alloc.
- * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
+/* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
+ * qemu_get_ram_ptr but do not touch ram_list.mru_block.
+ *
+ * ??? Is this still necessary?
  */
 static void *qemu_safe_ram_ptr(ram_addr_t addr)
 {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index c12e35f..9fe6fc0 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -495,6 +495,7 @@ typedef struct RAMBlock {
 
 typedef struct RAMList {
     uint8_t *phys_dirty;
+    RAMBlock *mru_block;
     QLIST_HEAD(, RAMBlock) blocks;
 } RAMList;
 extern RAMList ram_list;
commit 803ef03257a9ee375f08ca7a89e009ea12bc17a4
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 3 14:08:05 2012 +0200

    migration-fd: remove duplicate include
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/migration-fd.c b/migration-fd.c
index c45c42e..5086a90 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -19,7 +19,6 @@
 #include "monitor/monitor.h"
 #include "migration/qemu-file.h"
 #include "block/block.h"
-#include "qemu/sockets.h"
 
 //#define DEBUG_MIGRATION_FD
 
commit 557ec5a001740d234e2b9604f0697a0d52ae90ca
Author: Juan Quintela <quintela at redhat.com>
Date:   Wed Oct 3 14:07:31 2012 +0200

    migration: include qemu-file.h
    
    They don't use/know anything about buffered-file.
    
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/migration-exec.c b/migration-exec.c
index 68f36f4..b7b760b 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -18,7 +18,7 @@
 #include "qemu-common.h"
 #include "qemu/sockets.h"
 #include "migration/migration.h"
-#include "buffered_file.h"
+#include "migration/qemu-file.h"
 #include "block/block.h"
 #include <sys/types.h>
 #include <sys/wait.h>
diff --git a/migration-fd.c b/migration-fd.c
index ea121bc..c45c42e 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -17,7 +17,7 @@
 #include "qemu/sockets.h"
 #include "migration/migration.h"
 #include "monitor/monitor.h"
-#include "buffered_file.h"
+#include "migration/qemu-file.h"
 #include "block/block.h"
 #include "qemu/sockets.h"
 
diff --git a/migration-tcp.c b/migration-tcp.c
index 3c4c315..1fca428 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -16,7 +16,7 @@
 #include "qemu-common.h"
 #include "qemu/sockets.h"
 #include "migration/migration.h"
-#include "buffered_file.h"
+#include "migration/qemu-file.h"
 #include "block/block.h"
 
 //#define DEBUG_MIGRATION_TCP
diff --git a/migration-unix.c b/migration-unix.c
index d5f9868..f2f368c 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -16,7 +16,7 @@
 #include "qemu-common.h"
 #include "qemu/sockets.h"
 #include "migration/migration.h"
-#include "buffered_file.h"
+#include "migration/qemu-file.h"
 #include "block/block.h"
 
 //#define DEBUG_MIGRATION_UNIX
commit 24ea1e4b4b79cef2bac6f8e0f0a212f42ef420a9
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Sat Nov 10 18:58:40 2012 +0100

    migration: remove double call to migrate_fd_close
    
    The call in buffered_close is enough, because buffered_close is called
    already by migrate_fd_cleanup.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/migration.c b/migration.c
index 810f25e..e560930 100644
--- a/migration.c
+++ b/migration.c
@@ -272,7 +272,7 @@ static int migrate_fd_cleanup(MigrationState *s)
         s->file = NULL;
     }
 
-    migrate_fd_close(s);
+    assert(s->fd == -1);
     return ret;
 }
 
commit bde54c08b4854aceee3dee25121a2b835cb81166
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Nov 20 12:48:19 2012 +0100

    buffered_file: do not send more than s->bytes_xfer bytes per tick
    
    Sending more was possible if the buffer was large.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/buffered_file.c b/buffered_file.c
index 27627a1..1de47e0 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -65,9 +65,9 @@ static ssize_t buffered_flush(QEMUFileBuffered *s)
     DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);
 
     while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
-
+        size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer);
         ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
-                                    s->buffer_size - offset);
+                                    to_send);
         if (ret == -EAGAIN) {
             DPRINTF("backend not ready, freezing\n");
             ret = 0;
commit 244eaa7514a944b36273eb8428f32da8e9124fcf
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Dec 12 12:54:43 2012 +0100

    migration: fix migration_bitmap leak
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Juan Quintela <quintela at redhat.com>

diff --git a/arch_init.c b/arch_init.c
index a8b6541..e01f0e5 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -535,9 +535,13 @@ static void sort_ram_list(void)
 
 static void migration_end(void)
 {
-    memory_global_dirty_log_stop();
+    if (migration_bitmap) {
+        memory_global_dirty_log_stop();
+        g_free(migration_bitmap);
+        migration_bitmap = NULL;
+    }
 
-    if (migrate_use_xbzrle()) {
+    if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
         g_free(XBZRLE.cache);
         g_free(XBZRLE.encoded_buf);
@@ -689,13 +693,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
         }
         bytes_transferred += bytes_sent;
     }
-    memory_global_dirty_log_stop();
+    migration_end();
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
-    g_free(migration_bitmap);
-    migration_bitmap = NULL;
-
     return 0;
 }
 
commit d2a0ccc613ccc48c7240f99e1ce05e0acce6e2a1
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Dec 17 13:01:07 2012 +0200

    virtio: make bindings typesafe
    
    Move bindings from opaque to DeviceState.
    This gives us better type safety with no performance cost.
    Add macros to make future QOM work easier.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 963b4f0..84fba96 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -136,7 +136,7 @@ static int s390_virtio_device_init(VirtIOS390Device *dev, VirtIODevice *vdev)
 
     bus->dev_offs += dev_len;
 
-    virtio_bind_device(vdev, &virtio_s390_bindings, dev);
+    virtio_bind_device(vdev, &virtio_s390_bindings, DEVICE(dev));
     dev->host_features = vdev->get_features(vdev, dev->host_features);
     s390_virtio_device_sync(dev);
     s390_virtio_reset_idx(dev);
@@ -363,9 +363,23 @@ VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem)
     return NULL;
 }
 
-static void virtio_s390_notify(void *opaque, uint16_t vector)
+/* DeviceState to VirtIOS390Device. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtIOS390Device *to_virtio_s390_device_fast(DeviceState *d)
+{
+    return container_of(d, VirtIOS390Device, qdev);
+}
+
+/* DeviceState to VirtIOS390Device. TODO: use QOM. */
+static inline VirtIOS390Device *to_virtio_s390_device(DeviceState *d)
+{
+    return container_of(d, VirtIOS390Device, qdev);
+}
+
+static void virtio_s390_notify(DeviceState *d, uint16_t vector)
 {
-    VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
+    VirtIOS390Device *dev = to_virtio_s390_device_fast(d);
     uint64_t token = s390_virtio_device_vq_token(dev, vector);
     S390CPU *cpu = s390_cpu_addr2state(0);
     CPUS390XState *env = &cpu->env;
@@ -373,9 +387,9 @@ static void virtio_s390_notify(void *opaque, uint16_t vector)
     s390_virtio_irq(env, 0, token);
 }
 
-static unsigned virtio_s390_get_features(void *opaque)
+static unsigned virtio_s390_get_features(DeviceState *d)
 {
-    VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
+    VirtIOS390Device *dev = to_virtio_s390_device(d);
     return dev->host_features;
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index d2d2454..1f1a285 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -96,35 +96,48 @@
 bool virtio_is_big_endian(void);
 
 /* virtio device */
+/* DeviceState to VirtIOPCIProxy. For use off data-path. TODO: use QOM. */
+static inline VirtIOPCIProxy *to_virtio_pci_proxy(DeviceState *d)
+{
+    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+}
 
-static void virtio_pci_notify(void *opaque, uint16_t vector)
+/* DeviceState to VirtIOPCIProxy. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtIOPCIProxy *to_virtio_pci_proxy_fast(DeviceState *d)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    return container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+}
+
+static void virtio_pci_notify(DeviceState *d, uint16_t vector)
+{
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);
     if (msix_enabled(&proxy->pci_dev))
         msix_notify(&proxy->pci_dev, vector);
     else
         qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
-static void virtio_pci_save_config(void * opaque, QEMUFile *f)
+static void virtio_pci_save_config(DeviceState *d, QEMUFile *f)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     pci_device_save(&proxy->pci_dev, f);
     msix_save(&proxy->pci_dev, f);
     if (msix_present(&proxy->pci_dev))
         qemu_put_be16(f, proxy->vdev->config_vector);
 }
 
-static void virtio_pci_save_queue(void * opaque, int n, QEMUFile *f)
+static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     if (msix_present(&proxy->pci_dev))
         qemu_put_be16(f, virtio_queue_vector(proxy->vdev, n));
 }
 
-static int virtio_pci_load_config(void * opaque, QEMUFile *f)
+static int virtio_pci_load_config(DeviceState *d, QEMUFile *f)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     int ret;
     ret = pci_device_load(&proxy->pci_dev, f);
     if (ret) {
@@ -143,9 +156,9 @@ static int virtio_pci_load_config(void * opaque, QEMUFile *f)
     return 0;
 }
 
-static int virtio_pci_load_queue(void * opaque, int n, QEMUFile *f)
+static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     uint16_t vector;
     if (msix_present(&proxy->pci_dev)) {
         qemu_get_be16s(f, &vector);
@@ -243,7 +256,7 @@ static void virtio_pci_stop_ioeventfd(VirtIOPCIProxy *proxy)
 
 void virtio_pci_reset(DeviceState *d)
 {
-    VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     virtio_pci_stop_ioeventfd(proxy);
     virtio_reset(proxy->vdev);
     msix_unuse_all_vectors(&proxy->pci_dev);
@@ -463,9 +476,9 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
     }
 }
 
-static unsigned virtio_pci_get_features(void *opaque)
+static unsigned virtio_pci_get_features(DeviceState *d)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     return proxy->host_features;
 }
 
@@ -567,9 +580,9 @@ static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
     }
 }
 
-static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
+static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
     EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
 
@@ -587,15 +600,15 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
     return 0;
 }
 
-static bool virtio_pci_query_guest_notifiers(void *opaque)
+static bool virtio_pci_query_guest_notifiers(DeviceState *d)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     return msix_enabled(&proxy->pci_dev);
 }
 
-static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
+static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
     VirtIODevice *vdev = proxy->vdev;
     int r, n;
 
@@ -611,7 +624,7 @@ static int virtio_pci_set_guest_notifiers(void *opaque, bool assign)
             break;
         }
 
-        r = virtio_pci_set_guest_notifier(opaque, n, assign);
+        r = virtio_pci_set_guest_notifier(d, n, assign);
         if (r < 0) {
             goto assign_error;
         }
@@ -636,14 +649,14 @@ assign_error:
     /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
     assert(assign);
     while (--n >= 0) {
-        virtio_pci_set_guest_notifier(opaque, n, !assign);
+        virtio_pci_set_guest_notifier(d, n, !assign);
     }
     return r;
 }
 
-static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
+static int virtio_pci_set_host_notifier(DeviceState *d, int n, bool assign)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 
     /* Stop using ioeventfd for virtqueue kick if the device starts using host
      * notifiers.  This makes it easy to avoid stepping on each others' toes.
@@ -659,9 +672,9 @@ static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
     return virtio_pci_set_host_notifier_internal(proxy, n, assign, false);
 }
 
-static void virtio_pci_vmstate_change(void *opaque, bool running)
+static void virtio_pci_vmstate_change(DeviceState *d, bool running)
 {
-    VirtIOPCIProxy *proxy = opaque;
+    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 
     if (running) {
         /* Try to find out if the guest has bus master disabled, but is
@@ -726,7 +739,7 @@ void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev)
         proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
     }
 
-    virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
+    virtio_bind_device(vdev, &virtio_pci_bindings, DEVICE(proxy));
     proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
     proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
     proxy->host_features = vdev->get_features(vdev, proxy->host_features);
diff --git a/hw/virtio.c b/hw/virtio.c
index 0455a9e..77b53a9 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -935,7 +935,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
 }
 
 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
-                        void *opaque)
+                        DeviceState *opaque)
 {
     vdev->binding = binding;
     vdev->binding_opaque = opaque;
diff --git a/hw/virtio.h b/hw/virtio.h
index 5416004..1dec9dc 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -91,17 +91,17 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*notify)(void * opaque, uint16_t vector);
-    void (*save_config)(void * opaque, QEMUFile *f);
-    void (*save_queue)(void * opaque, int n, QEMUFile *f);
-    int (*load_config)(void * opaque, QEMUFile *f);
-    int (*load_queue)(void * opaque, int n, QEMUFile *f);
-    int (*load_done)(void * opaque, QEMUFile *f);
-    unsigned (*get_features)(void * opaque);
-    bool (*query_guest_notifiers)(void * opaque);
-    int (*set_guest_notifiers)(void * opaque, bool assigned);
-    int (*set_host_notifier)(void * opaque, int n, bool assigned);
-    void (*vmstate_change)(void * opaque, bool running);
+    void (*notify)(DeviceState *d, uint16_t vector);
+    void (*save_config)(DeviceState *d, QEMUFile *f);
+    void (*save_queue)(DeviceState *d, int n, QEMUFile *f);
+    int (*load_config)(DeviceState *d, QEMUFile *f);
+    int (*load_queue)(DeviceState *d, int n, QEMUFile *f);
+    int (*load_done)(DeviceState *d, QEMUFile *f);
+    unsigned (*get_features)(DeviceState *d);
+    bool (*query_guest_notifiers)(DeviceState *d);
+    int (*set_guest_notifiers)(DeviceState *d, bool assigned);
+    int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
+    void (*vmstate_change)(DeviceState *d, bool running);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 64
@@ -128,7 +128,7 @@ struct VirtIODevice
     void (*set_status)(VirtIODevice *vdev, uint8_t val);
     VirtQueue *vq;
     const VirtIOBindings *binding;
-    void *binding_opaque;
+    DeviceState *binding_opaque;
     uint16_t device_id;
     bool vm_running;
     VMChangeStateEntry *vmstate;
@@ -191,7 +191,7 @@ void virtio_update_irq(VirtIODevice *vdev);
 int virtio_set_features(VirtIODevice *vdev, uint32_t val);
 
 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
-                        void *opaque);
+                        DeviceState *opaque);
 
 /* Base devices.  */
 typedef struct VirtIOBlkConf VirtIOBlkConf;
commit 15faf946f7a17a5fab0d05a2312d43249d81af3c
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Thu Dec 20 08:19:16 2012 +0100

    Update seabios to a810e4e72a0d42c7bc04eda57382f8e019add901
    
    git shortlog:
    
    Kevin O'Connor (6):
          floppy: Minor - reduce handle_0e code size when CONFIG_FLOPPY is disabled.
          vga: Minor comment spelling fix.
          Don't recursively evaluate CFLAGS variables.
          Don't use gcc's -combine option.
          Add compile checking phase to build.
          acpi: Use prt_slot() macro to describe irq pins of first PCI device.
    
    Laszlo Ersek (1):
          maininit(): print machine UUID under seabios version message
    
    Paolo Bonzini (1):
          acpi: reintroduce LNKS
    
    Paolo's patch fixes the FreeBSD boot failure.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/pc-bios/acpi-dsdt.aml b/pc-bios/acpi-dsdt.aml
index 18b4dc1..00224ea 100644
Binary files a/pc-bios/acpi-dsdt.aml and b/pc-bios/acpi-dsdt.aml differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 3eefff4..3910875 100644
Binary files a/pc-bios/bios.bin and b/pc-bios/bios.bin differ
diff --git a/pc-bios/q35-acpi-dsdt.aml b/pc-bios/q35-acpi-dsdt.aml
index 8a50559..e50641c 100644
Binary files a/pc-bios/q35-acpi-dsdt.aml and b/pc-bios/q35-acpi-dsdt.aml differ
diff --git a/roms/seabios b/roms/seabios
index e8a76b0..a810e4e 160000
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@
-Subproject commit e8a76b0f225bba5ba9d63ab227e0a37b3beb1059
+Subproject commit a810e4e72a0d42c7bc04eda57382f8e019add901
commit 62e0c095450f6a7eb37914991f3f7966aa4da7a1
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sun Dec 9 20:15:31 2012 +0100

    MAINTAINERS: Include X86CPU in CPU maintenance area
    
    Document that the x86 CPU refactorings are going through the qom-cpu
    tree. This does not contradict the established practice that patches
    adding KVM features to the x86 CPU go through the KVM maintainers,
    it merely takes it out of target-i386 TCG's Odd Fixes status.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Cc: Marcello Tosatti <mtosatti at redhat.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 2ede20d..61d5a4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -599,6 +599,7 @@ M: Andreas FÃ¤rber <afaerber at suse.de>
 S: Supported
 F: qom/cpu.c
 F: include/qemu/cpu.h
+F: target-i386/cpu.c
 
 Device Tree
 M: Peter Crosthwaite <peter.crosthwaite at petalogix.com>
commit f7575c96c6058763fe3bd8dd26f3d09473f2df36
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 06:18:14 2012 +0100

    cpu: Move kvm_run into CPUState
    
    Pass CPUState / {X86,S390}CPU to helper functions.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/cpu-defs.h b/cpu-defs.h
index 9ed54cf..caea72b 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -134,7 +134,6 @@ typedef struct icount_decr_u16 {
 } icount_decr_u16;
 #endif
 
-struct kvm_run;
 struct qemu_work_item;
 
 typedef struct CPUBreakpoint {
@@ -203,7 +202,6 @@ typedef struct CPUWatchpoint {
     /* user data */                                                     \
     void *opaque;                                                       \
                                                                         \
-    const char *cpu_model_str;                                          \
-    struct kvm_run *kvm_run;
+    const char *cpu_model_str;
 
 #endif
diff --git a/include/qemu/cpu.h b/include/qemu/cpu.h
index 03962ef..fa3ffdb 100644
--- a/include/qemu/cpu.h
+++ b/include/qemu/cpu.h
@@ -53,6 +53,7 @@ typedef struct CPUClass {
 } CPUClass;
 
 struct KVMState;
+struct kvm_run;
 
 /**
  * CPUState:
@@ -85,6 +86,7 @@ struct CPUState {
     bool kvm_vcpu_dirty;
 #endif
     struct KVMState *kvm_state;
+    struct kvm_run *kvm_run;
 
     /* TODO Move common fields from CPUArchState here. */
 };
diff --git a/kvm-all.c b/kvm-all.c
index ba139ab..f687229 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -240,9 +240,9 @@ int kvm_init_vcpu(CPUArchState *env)
         goto err;
     }
 
-    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+    cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                         cpu->kvm_fd, 0);
-    if (env->kvm_run == MAP_FAILED) {
+    if (cpu->kvm_run == MAP_FAILED) {
         ret = -errno;
         DPRINTF("mmap'ing vcpu state failed\n");
         goto err;
@@ -250,7 +250,7 @@ int kvm_init_vcpu(CPUArchState *env)
 
     if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
         s->coalesced_mmio_ring =
-            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
+            (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
     }
 
     ret = kvm_arch_init_vcpu(cpu);
@@ -1529,7 +1529,7 @@ void kvm_cpu_synchronize_post_init(CPUArchState *env)
 int kvm_cpu_exec(CPUArchState *env)
 {
     CPUState *cpu = ENV_GET_CPU(env);
-    struct kvm_run *run = env->kvm_run;
+    struct kvm_run *run = cpu->kvm_run;
     int ret, run_ret;
 
     DPRINTF("kvm_cpu_exec()\n");
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ff5c9cd..53096c9 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1859,9 +1859,11 @@ static int kvm_handle_halt(X86CPU *cpu)
     return 0;
 }
 
-static int kvm_handle_tpr_access(CPUX86State *env)
+static int kvm_handle_tpr_access(X86CPU *cpu)
 {
-    struct kvm_run *run = env->kvm_run;
+    CPUX86State *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+    struct kvm_run *run = cs->kvm_run;
 
     apic_handle_tpr_access_report(env->apic_state, run->tpr_access.rip,
                                   run->tpr_access.is_write ? TPR_ACCESS_WRITE
@@ -2067,7 +2069,6 @@ static bool host_supports_vmx(void)
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
     uint64_t code;
     int ret;
 
@@ -2080,7 +2081,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ret = 0;
         break;
     case KVM_EXIT_TPR_ACCESS:
-        ret = kvm_handle_tpr_access(env);
+        ret = kvm_handle_tpr_access(cpu);
         break;
     case KVM_EXIT_FAIL_ENTRY:
         code = run->fail_entry.hardware_entry_failure_reason;
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 4e34304..0b64092 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -98,13 +98,13 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     int i;
 
     /* always save the PSW  and the GPRS*/
-    env->kvm_run->psw_addr = env->psw.addr;
-    env->kvm_run->psw_mask = env->psw.mask;
+    cs->kvm_run->psw_addr = env->psw.addr;
+    cs->kvm_run->psw_mask = env->psw.mask;
 
-    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
+    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
         for (i = 0; i < 16; i++) {
-            env->kvm_run->s.regs.gprs[i] = env->regs[i];
-            env->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+            cs->kvm_run->s.regs.gprs[i] = env->regs[i];
+            cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS;
         }
     } else {
         for (i = 0; i < 16; i++) {
@@ -122,14 +122,14 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     }
 
     if (cap_sync_regs &&
-        env->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
-        env->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
+        cs->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
+        cs->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
         for (i = 0; i < 16; i++) {
-            env->kvm_run->s.regs.acrs[i] = env->aregs[i];
-            env->kvm_run->s.regs.crs[i] = env->cregs[i];
+            cs->kvm_run->s.regs.acrs[i] = env->aregs[i];
+            cs->kvm_run->s.regs.crs[i] = env->cregs[i];
         }
-        env->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS;
-        env->kvm_run->kvm_dirty_regs |= KVM_SYNC_CRS;
+        cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS;
+        cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_CRS;
     } else {
         for (i = 0; i < 16; i++) {
             sregs.acrs[i] = env->aregs[i];
@@ -142,9 +142,9 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     }
 
     /* Finally the prefix */
-    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
-        env->kvm_run->s.regs.prefix = env->psa;
-        env->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX;
+    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
+        cs->kvm_run->s.regs.prefix = env->psa;
+        cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX;
     } else {
         /* prefix is only supported via sync regs */
     }
@@ -161,13 +161,13 @@ int kvm_arch_get_registers(CPUState *cs)
     int i;
 
     /* get the PSW */
-    env->psw.addr = env->kvm_run->psw_addr;
-    env->psw.mask = env->kvm_run->psw_mask;
+    env->psw.addr = cs->kvm_run->psw_addr;
+    env->psw.mask = cs->kvm_run->psw_mask;
 
     /* the GPRS */
-    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
+    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
         for (i = 0; i < 16; i++) {
-            env->regs[i] = env->kvm_run->s.regs.gprs[i];
+            env->regs[i] = cs->kvm_run->s.regs.gprs[i];
         }
     } else {
         ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
@@ -181,11 +181,11 @@ int kvm_arch_get_registers(CPUState *cs)
 
     /* The ACRS and CRS */
     if (cap_sync_regs &&
-        env->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
-        env->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
+        cs->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
+        cs->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
         for (i = 0; i < 16; i++) {
-            env->aregs[i] = env->kvm_run->s.regs.acrs[i];
-            env->cregs[i] = env->kvm_run->s.regs.crs[i];
+            env->aregs[i] = cs->kvm_run->s.regs.acrs[i];
+            env->cregs[i] = cs->kvm_run->s.regs.crs[i];
         }
     } else {
         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
@@ -199,8 +199,8 @@ int kvm_arch_get_registers(CPUState *cs)
     }
 
     /* Finally the prefix */
-    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
-        env->psa = env->kvm_run->s.regs.prefix;
+    if (cap_sync_regs && cs->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
+        env->psa = cs->kvm_run->s.regs.prefix;
     } else {
         /* no prefix without sync regs */
     }
@@ -331,10 +331,13 @@ static void enter_pgmcheck(S390CPU *cpu, uint16_t code)
     kvm_s390_interrupt(cpu, KVM_S390_PROGRAM_INT, code);
 }
 
-static inline void setcc(CPUS390XState *env, uint64_t cc)
+static inline void setcc(S390CPU *cpu, uint64_t cc)
 {
-    env->kvm_run->psw_mask &= ~(3ull << 44);
-    env->kvm_run->psw_mask |= (cc & 3) << 44;
+    CPUS390XState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+
+    cs->kvm_run->psw_mask &= ~(3ull << 44);
+    cs->kvm_run->psw_mask |= (cc & 3) << 44;
 
     env->psw.mask &= ~(3ul << 44);
     env->psw.mask |= (cc & 3) << 44;
@@ -356,7 +359,7 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run,
     if (r < 0) {
         enter_pgmcheck(cpu, -r);
     }
-    setcc(env, r);
+    setcc(cpu, r);
 
     return 0;
 }
@@ -446,8 +449,9 @@ static int s390_cpu_initial_reset(S390CPU *cpu)
     return 0;
 }
 
-static int handle_sigp(CPUS390XState *env, struct kvm_run *run, uint8_t ipa1)
+static int handle_sigp(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
+    CPUS390XState *env = &cpu->env;
     uint8_t order_code;
     uint32_t parameter;
     uint16_t cpu_addr;
@@ -499,7 +503,7 @@ static int handle_sigp(CPUS390XState *env, struct kvm_run *run, uint8_t ipa1)
     }
 
 out:
-    setcc(env, r ? 3 : 0);
+    setcc(cpu, r ? 3 : 0);
     return 0;
 }
 
@@ -520,7 +524,7 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
             r = handle_diag(env, run, ipb_code);
             break;
         case IPA0_SIGP:
-            r = handle_sigp(env, run, ipa1);
+            r = handle_sigp(cpu, run, ipa1);
             break;
     }
 
@@ -530,28 +534,29 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
     return 0;
 }
 
-static bool is_special_wait_psw(CPUS390XState *env)
+static bool is_special_wait_psw(CPUState *cs)
 {
     /* signal quiesce */
-    return env->kvm_run->psw_addr == 0xfffUL;
+    return cs->kvm_run->psw_addr == 0xfffUL;
 }
 
 static int handle_intercept(S390CPU *cpu)
 {
     CPUS390XState *env = &cpu->env;
-    struct kvm_run *run = env->kvm_run;
+    CPUState *cs = CPU(cpu);
+    struct kvm_run *run = cs->kvm_run;
     int icpt_code = run->s390_sieic.icptcode;
     int r = 0;
 
     dprintf("intercept: 0x%x (at 0x%lx)\n", icpt_code,
-            (long)env->kvm_run->psw_addr);
+            (long)cs->kvm_run->psw_addr);
     switch (icpt_code) {
         case ICPT_INSTRUCTION:
             r = handle_instruction(cpu, run);
             break;
         case ICPT_WAITPSW:
             if (s390_del_running_cpu(env) == 0 &&
-                is_special_wait_psw(env)) {
+                is_special_wait_psw(cs)) {
                 qemu_system_shutdown_request();
             }
             r = EXCP_HALTED;
commit a60f24b56b07f46453424263b276b0879c25c4e6
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 05:35:08 2012 +0100

    cpu: Move kvm_state field into CPUState
    
    Adapt some functions to take CPUState / {PowerPC,S390}CPU argument.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/cpu-defs.h b/cpu-defs.h
index a382e35..9ed54cf 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -135,7 +135,6 @@ typedef struct icount_decr_u16 {
 #endif
 
 struct kvm_run;
-struct KVMState;
 struct qemu_work_item;
 
 typedef struct CPUBreakpoint {
@@ -205,7 +204,6 @@ typedef struct CPUWatchpoint {
     void *opaque;                                                       \
                                                                         \
     const char *cpu_model_str;                                          \
-    struct KVMState *kvm_state;                                         \
     struct kvm_run *kvm_run;
 
 #endif
diff --git a/include/qemu/cpu.h b/include/qemu/cpu.h
index b8f8dd1..03962ef 100644
--- a/include/qemu/cpu.h
+++ b/include/qemu/cpu.h
@@ -52,6 +52,8 @@ typedef struct CPUClass {
     void (*reset)(CPUState *cpu);
 } CPUClass;
 
+struct KVMState;
+
 /**
  * CPUState:
  * @created: Indicates whether the CPU thread has been successfully created.
@@ -82,6 +84,7 @@ struct CPUState {
     int kvm_fd;
     bool kvm_vcpu_dirty;
 #endif
+    struct KVMState *kvm_state;
 
     /* TODO Move common fields from CPUArchState here. */
 };
diff --git a/kvm-all.c b/kvm-all.c
index 5f1d1fe..ba139ab 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -230,7 +230,7 @@ int kvm_init_vcpu(CPUArchState *env)
     }
 
     cpu->kvm_fd = ret;
-    env->kvm_state = s;
+    cpu->kvm_state = s;
     cpu->kvm_vcpu_dirty = true;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
@@ -1763,12 +1763,12 @@ void kvm_setup_guest_memory(void *start, size_t size)
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
                                                  target_ulong pc)
 {
     struct kvm_sw_breakpoint *bp;
 
-    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
+    QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
         if (bp->pc == pc) {
             return bp;
         }
@@ -1776,23 +1776,23 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
     return NULL;
 }
 
-int kvm_sw_breakpoints_active(CPUArchState *env)
+int kvm_sw_breakpoints_active(CPUState *cpu)
 {
-    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
+    return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
 }
 
 struct kvm_set_guest_debug_data {
     struct kvm_guest_debug dbg;
-    CPUArchState *env;
+    CPUState *cpu;
     int err;
 };
 
 static void kvm_invoke_set_guest_debug(void *data)
 {
     struct kvm_set_guest_debug_data *dbg_data = data;
-    CPUState *cpu = ENV_GET_CPU(dbg_data->env);
 
-    dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
+    dbg_data->err = kvm_vcpu_ioctl(dbg_data->cpu, KVM_SET_GUEST_DEBUG,
+                                   &dbg_data->dbg);
 }
 
 int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
@@ -1806,7 +1806,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
         data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
     }
     kvm_arch_update_guest_debug(cpu, &data.dbg);
-    data.env = env;
+    data.cpu = cpu;
 
     run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
     return data.err;
@@ -1821,7 +1821,7 @@ int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
     int err;
 
     if (type == GDB_BREAKPOINT_SW) {
-        bp = kvm_find_sw_breakpoint(current_env, addr);
+        bp = kvm_find_sw_breakpoint(current_cpu, addr);
         if (bp) {
             bp->use_count++;
             return 0;
@@ -1840,7 +1840,7 @@ int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
             return err;
         }
 
-        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
+        QTAILQ_INSERT_HEAD(&current_cpu->kvm_state->kvm_sw_breakpoints,
                           bp, entry);
     } else {
         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
@@ -1867,7 +1867,7 @@ int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
     int err;
 
     if (type == GDB_BREAKPOINT_SW) {
-        bp = kvm_find_sw_breakpoint(current_env, addr);
+        bp = kvm_find_sw_breakpoint(current_cpu, addr);
         if (!bp) {
             return -ENOENT;
         }
@@ -1882,7 +1882,7 @@ int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
             return err;
         }
 
-        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
+        QTAILQ_REMOVE(&current_cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
         g_free(bp);
     } else {
         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
@@ -1904,7 +1904,7 @@ void kvm_remove_all_breakpoints(CPUArchState *current_env)
 {
     CPUState *current_cpu = ENV_GET_CPU(current_env);
     struct kvm_sw_breakpoint *bp, *next;
-    KVMState *s = current_env->kvm_state;
+    KVMState *s = current_cpu->kvm_state;
     CPUArchState *env;
     CPUState *cpu;
 
diff --git a/kvm.h b/kvm.h
index a2375ff..6ddcdc5 100644
--- a/kvm.h
+++ b/kvm.h
@@ -207,10 +207,10 @@ struct kvm_sw_breakpoint {
 
 QTAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint);
 
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
                                                  target_ulong pc);
 
-int kvm_sw_breakpoints_active(CPUArchState *env);
+int kvm_sw_breakpoints_active(CPUState *cpu);
 
 int kvm_arch_insert_sw_breakpoint(CPUState *current_cpu,
                                   struct kvm_sw_breakpoint *bp);
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 7be3ad8..04a90c5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1636,6 +1636,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                    uint32_t *eax, uint32_t *ebx,
                    uint32_t *ecx, uint32_t *edx)
 {
+    X86CPU *cpu = x86_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+
     /* test if maximum index reached */
     if (index & 0x80000000) {
         if (index > env->cpuid_xlevel) {
@@ -1752,7 +1755,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
     case 0xA:
         /* Architectural Performance Monitoring Leaf */
         if (kvm_enabled()) {
-            KVMState *s = env->kvm_state;
+            KVMState *s = cs->kvm_state;
 
             *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
             *ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX);
@@ -1775,7 +1778,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             break;
         }
         if (kvm_enabled()) {
-            KVMState *s = env->kvm_state;
+            KVMState *s = cs->kvm_state;
 
             *eax = kvm_arch_get_supported_cpuid(s, 0xd, count, R_EAX);
             *ebx = kvm_arch_get_supported_cpuid(s, 0xd, count, R_EBX);
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b2efa1e..ff5c9cd 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -316,7 +316,7 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
     if ((env->mcg_cap & MCG_SER_P) && addr
         && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
         if (qemu_ram_addr_from_host(addr, &ram_addr) ||
-            !kvm_physical_memory_addr_from_host(env->kvm_state, addr, &paddr)) {
+            !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!\n");
             /* Hope we are lucky for AO MCE */
@@ -348,8 +348,8 @@ int kvm_arch_on_sigbus(int code, void *addr)
 
         /* Hope we are lucky for AO MCE */
         if (qemu_ram_addr_from_host(addr, &ram_addr) ||
-            !kvm_physical_memory_addr_from_host(first_cpu->kvm_state, addr,
-                                                &paddr)) {
+            !kvm_physical_memory_addr_from_host(CPU(first_cpu)->kvm_state,
+                                                addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!: %p\n", addr);
             return 0;
@@ -579,12 +579,12 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     if (((env->cpuid_version >> 8)&0xF) >= 6
         && (env->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)
-        && kvm_check_extension(env->kvm_state, KVM_CAP_MCE) > 0) {
+        && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) {
         uint64_t mcg_cap;
         int banks;
         int ret;
 
-        ret = kvm_get_mce_cap_supported(env->kvm_state, &mcg_cap, &banks);
+        ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks);
         if (ret < 0) {
             fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret));
             return ret;
@@ -612,7 +612,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         return r;
     }
 
-    r = kvm_check_extension(env->kvm_state, KVM_CAP_TSC_CONTROL);
+    r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL);
     if (r && env->tsc_khz) {
         r = kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz);
         if (r < 0) {
@@ -1977,9 +1977,10 @@ void kvm_arch_remove_all_hw_breakpoints(void)
 
 static CPUWatchpoint hw_watchpoint;
 
-static int kvm_handle_debug(CPUX86State *env,
+static int kvm_handle_debug(X86CPU *cpu,
                             struct kvm_debug_exit_arch *arch_info)
 {
+    CPUX86State *env = &cpu->env;
     int ret = 0;
     int n;
 
@@ -2011,7 +2012,7 @@ static int kvm_handle_debug(CPUX86State *env,
                 }
             }
         }
-    } else if (kvm_find_sw_breakpoint(env, arch_info->pc)) {
+    } else if (kvm_find_sw_breakpoint(CPU(cpu), arch_info->pc)) {
         ret = EXCP_DEBUG;
     }
     if (ret == 0) {
@@ -2028,7 +2029,6 @@ static int kvm_handle_debug(CPUX86State *env,
 
 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
 {
-    CPUX86State *env = &X86_CPU(cpu)->env;
     const uint8_t type_code[] = {
         [GDB_BREAKPOINT_HW] = 0x0,
         [GDB_WATCHPOINT_WRITE] = 0x1,
@@ -2039,7 +2039,7 @@ void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
     };
     int n;
 
-    if (kvm_sw_breakpoints_active(env)) {
+    if (kvm_sw_breakpoints_active(cpu)) {
         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
     }
     if (nb_hw_breakpoint > 0) {
@@ -2106,7 +2106,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         break;
     case KVM_EXIT_DEBUG:
         DPRINTF("kvm_exit_debug\n");
-        ret = kvm_handle_debug(env, &run->debug.arch);
+        ret = kvm_handle_debug(cpu, &run->debug.arch);
         break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ad5bc66..eb52b76 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -140,7 +140,7 @@ static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
     int ret, i;
 
     if (!kvm_enabled() ||
-        !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
+        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
         return 0;
     }
 
@@ -178,9 +178,12 @@ static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 
 
 #if defined(TARGET_PPC64)
-static void kvm_get_fallback_smmu_info(CPUPPCState *env,
+static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
                                        struct kvm_ppc_smmu_info *info)
 {
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+
     memset(info, 0, sizeof(*info));
 
     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
@@ -206,7 +209,7 @@ static void kvm_get_fallback_smmu_info(CPUPPCState *env,
      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
      *   this fallback.
      */
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
         /* No flags */
         info->flags = 0;
         info->slb_size = 64;
@@ -262,18 +265,19 @@ static void kvm_get_fallback_smmu_info(CPUPPCState *env,
     }
 }
 
-static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
+static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 {
+    CPUState *cs = CPU(cpu);
     int ret;
 
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
-        ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
+    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
+        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
         if (ret == 0) {
             return;
         }
     }
 
-    kvm_get_fallback_smmu_info(env, info);
+    kvm_get_fallback_smmu_info(cpu, info);
 }
 
 static long getrampagesize(void)
@@ -316,10 +320,11 @@ static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
     return (1ul << shift) <= rampgsize;
 }
 
-static void kvm_fixup_page_sizes(CPUPPCState *env)
+static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 {
     static struct kvm_ppc_smmu_info smmu_info;
     static bool has_smmu_info;
+    CPUPPCState *env = &cpu->env;
     long rampagesize;
     int iq, ik, jq, jk;
 
@@ -330,7 +335,7 @@ static void kvm_fixup_page_sizes(CPUPPCState *env)
 
     /* Collect MMU info from kernel if not already */
     if (!has_smmu_info) {
-        kvm_get_smmu_info(env, &smmu_info);
+        kvm_get_smmu_info(cpu, &smmu_info);
         has_smmu_info = true;
     }
 
@@ -373,7 +378,7 @@ static void kvm_fixup_page_sizes(CPUPPCState *env)
 }
 #else /* defined (TARGET_PPC64) */
 
-static inline void kvm_fixup_page_sizes(CPUPPCState *env)
+static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 {
 }
 
@@ -386,7 +391,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     int ret;
 
     /* Gather server mmu info from KVM and update the CPU state */
-    kvm_fixup_page_sizes(cenv);
+    kvm_fixup_page_sizes(cpu);
 
     /* Synchronize sregs with kvm */
     ret = kvm_arch_sync_sregs(cpu);
@@ -986,12 +991,14 @@ uint32_t kvmppc_get_dfp(void)
 
 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
     uint32_t *hc = (uint32_t*)buf;
 
     struct kvm_ppc_pvinfo pvinfo;
 
-    if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
-        !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
+    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
+        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
         memcpy(buf, pvinfo.hcall, buf_len);
 
         return 0;
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 5422678..4e34304 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -291,12 +291,11 @@ int kvm_arch_process_async_events(CPUState *cs)
 void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm,
                                  uint64_t parm64, int vm)
 {
-    CPUS390XState *env = &cpu->env;
     CPUState *cs = CPU(cpu);
     struct kvm_s390_interrupt kvmint;
     int r;
 
-    if (!env->kvm_state) {
+    if (!cs->kvm_state) {
         return;
     }
 
@@ -305,7 +304,7 @@ void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm,
     kvmint.parm64 = parm64;
 
     if (vm) {
-        r = kvm_vm_ioctl(env->kvm_state, KVM_S390_INTERRUPT, &kvmint);
+        r = kvm_vm_ioctl(cs->kvm_state, KVM_S390_INTERRUPT, &kvmint);
     } else {
         r = kvm_vcpu_ioctl(cs, KVM_S390_INTERRUPT, &kvmint);
     }
commit a34a92b9ecd8d25bd1de9df601ed31ccd8ebcae7
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 04:43:18 2012 +0100

    ppc_booke: Pass PowerPCCPU to ppc_booke_timers_init()
    
    Cleans up after passing PowerPCCPU to timer callbacks.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/ppc.h b/hw/ppc.h
index 64463ad..ec33f9c 100644
--- a/hw/ppc.h
+++ b/hw/ppc.h
@@ -89,4 +89,4 @@ enum {
 #define PPC_SERIAL_MM_BAUDBASE 399193
 
 /* ppc_booke.c */
-void ppc_booke_timers_init(CPUPPCState *env, uint32_t freq, uint32_t flags);
+void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags);
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index af6b671..5592359 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -495,7 +495,7 @@ void ppce500_init(PPCE500Params *params)
         env->mpic_cpu_base = MPC8544_CCSRBAR_BASE +
                               MPC8544_MPIC_REGS_OFFSET + 0x20000;
 
-        ppc_booke_timers_init(env, 400000000, PPC_TIMER_E500);
+        ppc_booke_timers_init(cpu, 400000000, PPC_TIMER_E500);
 
         /* Register reset handler */
         if (!i) {
diff --git a/hw/ppc405_uc.c b/hw/ppc405_uc.c
index 373b8f3..fe71784 100644
--- a/hw/ppc405_uc.c
+++ b/hw/ppc405_uc.c
@@ -2482,7 +2482,7 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
     /* OBP arbitrer */
     ppc4xx_opba_init(0xef600600);
     /* Initialize timers */
-    ppc_booke_timers_init(env, sysclk, 0);
+    ppc_booke_timers_init(cpu, sysclk, 0);
     /* Universal interrupt controller */
     irqs = g_malloc0(sizeof(qemu_irq) * PPCUIC_OUTPUT_NB);
     irqs[PPCUIC_OUTPUT_INT] =
diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index cc85607..d9f0f81 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -195,7 +195,7 @@ static void bamboo_init(QEMUMachineInitArgs *args)
     env = &cpu->env;
 
     qemu_register_reset(main_cpu_reset, cpu);
-    ppc_booke_timers_init(env, 400000000, 0);
+    ppc_booke_timers_init(cpu, 400000000, 0);
     ppc_dcr_init(env, NULL, NULL);
 
     /* interrupt controller */
diff --git a/hw/ppc_booke.c b/hw/ppc_booke.c
index 5748063..f2c0409 100644
--- a/hw/ppc_booke.c
+++ b/hw/ppc_booke.c
@@ -237,16 +237,15 @@ void store_booke_tcr(CPUPPCState *env, target_ulong val)
 
 }
 
-void ppc_booke_timers_init(CPUPPCState *env, uint32_t freq, uint32_t flags)
+void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_tb_t *tb_env;
     booke_timer_t *booke_timer;
 
     tb_env      = g_malloc0(sizeof(ppc_tb_t));
     booke_timer = g_malloc0(sizeof(booke_timer_t));
 
-    env->tb_env = tb_env;
+    cpu->env.tb_env = tb_env;
     tb_env->flags = flags | PPC_TIMER_BOOKE | PPC_DECR_ZERO_TRIGGERED;
 
     tb_env->tb_freq    = freq;
diff --git a/hw/virtex_ml507.c b/hw/virtex_ml507.c
index 6ab8fee..1a19cd2 100644
--- a/hw/virtex_ml507.c
+++ b/hw/virtex_ml507.c
@@ -94,7 +94,7 @@ static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
     }
     env = &cpu->env;
 
-    ppc_booke_timers_init(env, sysclk, 0/* no flags */);
+    ppc_booke_timers_init(cpu, sysclk, 0/* no flags */);
 
     ppc_dcr_init(env, NULL, NULL);
 
commit 2f9859fb49cb3c6ec876bc0bf709f28afcdd2384
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 04:47:33 2012 +0100

    ppc4xx_devs: Return PowerPCCPU from ppc4xx_init()
    
    Prepares for passing PowerPCCPU to ppc_booke_timers_init().
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/ppc405_uc.c b/hw/ppc405_uc.c
index 0f458ef..373b8f3 100644
--- a/hw/ppc405_uc.c
+++ b/hw/ppc405_uc.c
@@ -2111,12 +2111,14 @@ CPUPPCState *ppc405cr_init(MemoryRegion *address_space_mem,
 {
     clk_setup_t clk_setup[PPC405CR_CLK_NB];
     qemu_irq dma_irqs[4];
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     qemu_irq *pic, *irqs;
 
     memset(clk_setup, 0, sizeof(clk_setup));
-    env = ppc4xx_init("405cr", &clk_setup[PPC405CR_CPU_CLK],
+    cpu = ppc4xx_init("405cr", &clk_setup[PPC405CR_CPU_CLK],
                       &clk_setup[PPC405CR_TMR_CLK], sysclk);
+    env = &cpu->env;
     /* Memory mapped devices registers */
     /* PLB arbitrer */
     ppc4xx_plb_init(env);
@@ -2460,13 +2462,15 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
 {
     clk_setup_t clk_setup[PPC405EP_CLK_NB], tlb_clk_setup;
     qemu_irq dma_irqs[4], gpt_irqs[5], mal_irqs[4];
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     qemu_irq *pic, *irqs;
 
     memset(clk_setup, 0, sizeof(clk_setup));
     /* init CPUs */
-    env = ppc4xx_init("405ep", &clk_setup[PPC405EP_CPU_CLK],
+    cpu = ppc4xx_init("405ep", &clk_setup[PPC405EP_CPU_CLK],
                       &tlb_clk_setup, sysclk);
+    env = &cpu->env;
     clk_setup[PPC405EP_CPU_CLK].cb = tlb_clk_setup.cb;
     clk_setup[PPC405EP_CPU_CLK].opaque = tlb_clk_setup.opaque;
     /* Internal devices init */
diff --git a/hw/ppc4xx.h b/hw/ppc4xx.h
index d795ced..5d891ae 100644
--- a/hw/ppc4xx.h
+++ b/hw/ppc4xx.h
@@ -28,9 +28,9 @@
 #include "pci.h"
 
 /* PowerPC 4xx core initialization */
-CPUPPCState *ppc4xx_init (const char *cpu_model,
-                       clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
-                       uint32_t sysclk);
+PowerPCCPU *ppc4xx_init(const char *cpu_model,
+                        clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
+                        uint32_t sysclk);
 
 /* PowerPC 4xx universal interrupt controller */
 enum {
diff --git a/hw/ppc4xx_devs.c b/hw/ppc4xx_devs.c
index bac8d87..761cb59 100644
--- a/hw/ppc4xx_devs.c
+++ b/hw/ppc4xx_devs.c
@@ -47,9 +47,9 @@ static void ppc4xx_reset(void *opaque)
 
 /*****************************************************************************/
 /* Generic PowerPC 4xx processor instantiation */
-CPUPPCState *ppc4xx_init (const char *cpu_model,
-                       clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
-                       uint32_t sysclk)
+PowerPCCPU *ppc4xx_init(const char *cpu_model,
+                        clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
+                        uint32_t sysclk)
 {
     PowerPCCPU *cpu;
     CPUPPCState *env;
@@ -72,7 +72,7 @@ CPUPPCState *ppc4xx_init (const char *cpu_model,
     /* Register qemu callbacks */
     qemu_register_reset(ppc4xx_reset, cpu);
 
-    return env;
+    return cpu;
 }
 
 /*****************************************************************************/
commit ee0c98e650da0ce1e4e17dc1e2bbb946cde2c45c
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 04:35:15 2012 +0100

    ppc_booke: Pass PowerPCCPU to {decr,fit,wdt} timer callbacks
    
    Cleans up after passing PowerPCCPU to booke_update_irq().
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/ppc_booke.c b/hw/ppc_booke.c
index da6bc4a..5748063 100644
--- a/hw/ppc_booke.c
+++ b/hw/ppc_booke.c
@@ -155,8 +155,8 @@ static void booke_update_fixed_timer(CPUPPCState         *env,
 
 static void booke_decr_cb(void *opaque)
 {
-    CPUPPCState *env = opaque;
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
 
     env->spr[SPR_BOOKE_TSR] |= TSR_DIS;
     booke_update_irq(cpu);
@@ -169,13 +169,11 @@ static void booke_decr_cb(void *opaque)
 
 static void booke_fit_cb(void *opaque)
 {
-    PowerPCCPU *cpu;
-    CPUPPCState *env;
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
     ppc_tb_t *tb_env;
     booke_timer_t *booke_timer;
 
-    env = opaque;
-    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     booke_timer = tb_env->opaque;
     env->spr[SPR_BOOKE_TSR] |= TSR_FIS;
@@ -190,13 +188,11 @@ static void booke_fit_cb(void *opaque)
 
 static void booke_wdt_cb(void *opaque)
 {
-    PowerPCCPU *cpu;
-    CPUPPCState *env;
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
     ppc_tb_t *tb_env;
     booke_timer_t *booke_timer;
 
-    env = opaque;
-    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     booke_timer = tb_env->opaque;
 
@@ -243,6 +239,7 @@ void store_booke_tcr(CPUPPCState *env, target_ulong val)
 
 void ppc_booke_timers_init(CPUPPCState *env, uint32_t freq, uint32_t flags)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_tb_t *tb_env;
     booke_timer_t *booke_timer;
 
@@ -255,10 +252,10 @@ void ppc_booke_timers_init(CPUPPCState *env, uint32_t freq, uint32_t flags)
     tb_env->tb_freq    = freq;
     tb_env->decr_freq  = freq;
     tb_env->opaque     = booke_timer;
-    tb_env->decr_timer = qemu_new_timer_ns(vm_clock, &booke_decr_cb, env);
+    tb_env->decr_timer = qemu_new_timer_ns(vm_clock, &booke_decr_cb, cpu);
 
     booke_timer->fit_timer =
-        qemu_new_timer_ns(vm_clock, &booke_fit_cb, env);
+        qemu_new_timer_ns(vm_clock, &booke_fit_cb, cpu);
     booke_timer->wdt_timer =
-        qemu_new_timer_ns(vm_clock, &booke_wdt_cb, env);
+        qemu_new_timer_ns(vm_clock, &booke_wdt_cb, cpu);
 }
commit 50c680f06ca81aebc91ac4a325f194b2d8396721
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 04:26:55 2012 +0100

    ppc: Pass PowerPCCPU to [h]decr timer callbacks
    
    Cleans up after passing PowerPCCPU to [h]decr exception callbacks.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/ppc.c b/hw/ppc.c
index b1b93a1..8c05eb3 100644
--- a/hw/ppc.c
+++ b/hw/ppc.c
@@ -714,11 +714,11 @@ void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value)
     _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, 0);
 }
 
-static void cpu_ppc_decr_cb (void *opaque)
+static void cpu_ppc_decr_cb(void *opaque)
 {
-    CPUPPCState *env = opaque;
+    PowerPCCPU *cpu = opaque;
 
-    _cpu_ppc_store_decr(ppc_env_get_cpu(env), 0x00000000, 0xFFFFFFFF, 1);
+    _cpu_ppc_store_decr(cpu, 0x00000000, 0xFFFFFFFF, 1);
 }
 
 static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, uint32_t hdecr,
@@ -739,11 +739,11 @@ void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value)
     _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, 0);
 }
 
-static void cpu_ppc_hdecr_cb (void *opaque)
+static void cpu_ppc_hdecr_cb(void *opaque)
 {
-    CPUPPCState *env = opaque;
+    PowerPCCPU *cpu = opaque;
 
-    _cpu_ppc_store_hdecr(ppc_env_get_cpu(env), 0x00000000, 0xFFFFFFFF, 1);
+    _cpu_ppc_store_hdecr(cpu, 0x00000000, 0xFFFFFFFF, 1);
 }
 
 static void cpu_ppc_store_purr(PowerPCCPU *cpu, uint64_t value)
@@ -774,17 +774,19 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
 /* Set up (once) timebase frequency (in Hz) */
 clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_tb_t *tb_env;
 
     tb_env = g_malloc0(sizeof(ppc_tb_t));
     env->tb_env = tb_env;
     tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED;
     /* Create new timer */
-    tb_env->decr_timer = qemu_new_timer_ns(vm_clock, &cpu_ppc_decr_cb, env);
+    tb_env->decr_timer = qemu_new_timer_ns(vm_clock, &cpu_ppc_decr_cb, cpu);
     if (0) {
         /* XXX: find a suitable condition to enable the hypervisor decrementer
          */
-        tb_env->hdecr_timer = qemu_new_timer_ns(vm_clock, &cpu_ppc_hdecr_cb, env);
+        tb_env->hdecr_timer = qemu_new_timer_ns(vm_clock, &cpu_ppc_hdecr_cb,
+                                                cpu);
     } else {
         tb_env->hdecr_timer = NULL;
     }
commit 7e0a924734e7bfad7568bf57fec68bfecd5c2575
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 04:18:02 2012 +0100

    ppc: Pass PowerPCCPU to [h]decr callbacks
    
    Cleans up after passing PowerPCCPU to ppc_set_irq().
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/ppc.c b/hw/ppc.c
index 6db595f..b1b93a1 100644
--- a/hw/ppc.c
+++ b/hw/ppc.c
@@ -644,30 +644,27 @@ uint64_t cpu_ppc_load_purr (CPUPPCState *env)
 /* When decrementer expires,
  * all we need to do is generate or queue a CPU exception
  */
-static inline void cpu_ppc_decr_excp(CPUPPCState *env)
+static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
-
     /* Raise it */
     LOG_TB("raise decrementer exception\n");
     ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1);
 }
 
-static inline void cpu_ppc_hdecr_excp(CPUPPCState *env)
+static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
-
     /* Raise it */
     LOG_TB("raise decrementer exception\n");
     ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1);
 }
 
-static void __cpu_ppc_store_decr (CPUPPCState *env, uint64_t *nextp,
-                                  struct QEMUTimer *timer,
-                                  void (*raise_excp)(CPUPPCState *),
-                                  uint32_t decr, uint32_t value,
-                                  int is_excp)
+static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+                                 struct QEMUTimer *timer,
+                                 void (*raise_excp)(PowerPCCPU *),
+                                 uint32_t decr, uint32_t value,
+                                 int is_excp)
 {
+    CPUPPCState *env = &cpu->env;
     ppc_tb_t *tb_env = env->tb_env;
     uint64_t now, next;
 
@@ -697,53 +694,61 @@ static void __cpu_ppc_store_decr (CPUPPCState *env, uint64_t *nextp,
     if ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED)
         && (value & 0x80000000)
         && !(decr & 0x80000000)) {
-        (*raise_excp)(env);
+        (*raise_excp)(cpu);
     }
 }
 
-static inline void _cpu_ppc_store_decr(CPUPPCState *env, uint32_t decr,
+static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, uint32_t decr,
                                        uint32_t value, int is_excp)
 {
-    ppc_tb_t *tb_env = env->tb_env;
+    ppc_tb_t *tb_env = cpu->env.tb_env;
 
-    __cpu_ppc_store_decr(env, &tb_env->decr_next, tb_env->decr_timer,
+    __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
                          &cpu_ppc_decr_excp, decr, value, is_excp);
 }
 
 void cpu_ppc_store_decr (CPUPPCState *env, uint32_t value)
 {
-    _cpu_ppc_store_decr(env, cpu_ppc_load_decr(env), value, 0);
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+
+    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, 0);
 }
 
 static void cpu_ppc_decr_cb (void *opaque)
 {
-    _cpu_ppc_store_decr(opaque, 0x00000000, 0xFFFFFFFF, 1);
+    CPUPPCState *env = opaque;
+
+    _cpu_ppc_store_decr(ppc_env_get_cpu(env), 0x00000000, 0xFFFFFFFF, 1);
 }
 
-static inline void _cpu_ppc_store_hdecr(CPUPPCState *env, uint32_t hdecr,
+static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, uint32_t hdecr,
                                         uint32_t value, int is_excp)
 {
-    ppc_tb_t *tb_env = env->tb_env;
+    ppc_tb_t *tb_env = cpu->env.tb_env;
 
     if (tb_env->hdecr_timer != NULL) {
-        __cpu_ppc_store_decr(env, &tb_env->hdecr_next, tb_env->hdecr_timer,
+        __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
                              &cpu_ppc_hdecr_excp, hdecr, value, is_excp);
     }
 }
 
 void cpu_ppc_store_hdecr (CPUPPCState *env, uint32_t value)
 {
-    _cpu_ppc_store_hdecr(env, cpu_ppc_load_hdecr(env), value, 0);
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+
+    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, 0);
 }
 
 static void cpu_ppc_hdecr_cb (void *opaque)
 {
-    _cpu_ppc_store_hdecr(opaque, 0x00000000, 0xFFFFFFFF, 1);
+    CPUPPCState *env = opaque;
+
+    _cpu_ppc_store_hdecr(ppc_env_get_cpu(env), 0x00000000, 0xFFFFFFFF, 1);
 }
 
-static void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
+static void cpu_ppc_store_purr(PowerPCCPU *cpu, uint64_t value)
 {
-    ppc_tb_t *tb_env = env->tb_env;
+    ppc_tb_t *tb_env = cpu->env.tb_env;
 
     tb_env->purr_load = value;
     tb_env->purr_start = qemu_get_clock_ns(vm_clock);
@@ -752,6 +757,7 @@ static void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
 static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
 {
     CPUPPCState *env = opaque;
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_tb_t *tb_env = env->tb_env;
 
     tb_env->tb_freq = freq;
@@ -760,9 +766,9 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
      * if a decrementer exception is pending when it enables msr_ee at startup,
      * it's not ready to handle it...
      */
-    _cpu_ppc_store_decr(env, 0xFFFFFFFF, 0xFFFFFFFF, 0);
-    _cpu_ppc_store_hdecr(env, 0xFFFFFFFF, 0xFFFFFFFF, 0);
-    cpu_ppc_store_purr(env, 0x0000000000000000ULL);
+    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+    cpu_ppc_store_purr(cpu, 0x0000000000000000ULL);
 }
 
 /* Set up (once) timebase frequency (in Hz) */
commit 7058581a26f4299e0b7e05677c64c1b5a50d0e75
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Dec 1 03:55:58 2012 +0100

    ppc: Pass PowerPCCPU to ppc_set_irq()
    
    Adapt static caller functions.
    
    This cleans up after passing PowerPCCPU to kvmppc_set_interrupt().
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/ppc.c b/hw/ppc.c
index e99a93d..6db595f 100644
--- a/hw/ppc.c
+++ b/hw/ppc.c
@@ -50,8 +50,9 @@
 static void cpu_ppc_tb_stop (CPUPPCState *env);
 static void cpu_ppc_tb_start (CPUPPCState *env);
 
-void ppc_set_irq(CPUPPCState *env, int n_IRQ, int level)
+void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
 {
+    CPUPPCState *env = &cpu->env;
     unsigned int old_pending = env->pending_interrupts;
 
     if (level) {
@@ -65,7 +66,7 @@ void ppc_set_irq(CPUPPCState *env, int n_IRQ, int level)
 
     if (old_pending != env->pending_interrupts) {
 #ifdef CONFIG_KVM
-        kvmppc_set_interrupt(ppc_env_get_cpu(env), n_IRQ, level);
+        kvmppc_set_interrupt(cpu, n_IRQ, level);
 #endif
     }
 
@@ -100,13 +101,13 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the external IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_EXT, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPC6xx_INPUT_SMI:
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the SMI IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_SMI, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level);
             break;
         case PPC6xx_INPUT_MCP:
             /* Negative edge sensitive */
@@ -116,7 +117,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
             if (cur_level == 1 && level == 0) {
                 LOG_IRQ("%s: raise machine check state\n",
                             __func__);
-                ppc_set_irq(env, PPC_INTERRUPT_MCK, 1);
+                ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
             }
             break;
         case PPC6xx_INPUT_CKSTP_IN:
@@ -138,7 +139,7 @@ static void ppc6xx_set_irq(void *opaque, int pin, int level)
         case PPC6xx_INPUT_SRESET:
             LOG_IRQ("%s: set the RESET IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_RESET, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
             break;
         default:
             /* Unknown pin - do nothing */
@@ -178,13 +179,13 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the external IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_EXT, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPC970_INPUT_THINT:
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the SMI IRQ state to %d\n", __func__,
                         level);
-            ppc_set_irq(env, PPC_INTERRUPT_THERM, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level);
             break;
         case PPC970_INPUT_MCP:
             /* Negative edge sensitive */
@@ -194,7 +195,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
             if (cur_level == 1 && level == 0) {
                 LOG_IRQ("%s: raise machine check state\n",
                             __func__);
-                ppc_set_irq(env, PPC_INTERRUPT_MCK, 1);
+                ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
             }
             break;
         case PPC970_INPUT_CKSTP:
@@ -218,7 +219,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
         case PPC970_INPUT_SRESET:
             LOG_IRQ("%s: set the RESET IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_RESET, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
             break;
         case PPC970_INPUT_TBEN:
             LOG_IRQ("%s: set the TBEN state to %d\n", __func__,
@@ -259,7 +260,7 @@ static void power7_set_irq(void *opaque, int pin, int level)
         /* Level sensitive - active high */
         LOG_IRQ("%s: set the external IRQ state to %d\n",
                 __func__, level);
-        ppc_set_irq(env, PPC_INTERRUPT_EXT, level);
+        ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
         break;
     default:
         /* Unknown pin - do nothing */
@@ -319,13 +320,13 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the critical IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_CEXT, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
             break;
         case PPC40x_INPUT_INT:
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the external IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_EXT, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPC40x_INPUT_HALT:
             /* Level sensitive - active low */
@@ -342,7 +343,7 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the debug pin state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_DEBUG, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
             break;
         default:
             /* Unknown pin - do nothing */
@@ -387,26 +388,26 @@ static void ppce500_set_irq(void *opaque, int pin, int level)
         case PPCE500_INPUT_RESET_CORE:
             if (level) {
                 LOG_IRQ("%s: reset the PowerPC core\n", __func__);
-                ppc_set_irq(env, PPC_INTERRUPT_MCK, level);
+                ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level);
             }
             break;
         case PPCE500_INPUT_CINT:
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the critical IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_CEXT, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
             break;
         case PPCE500_INPUT_INT:
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the core IRQ state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_EXT, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
             break;
         case PPCE500_INPUT_DEBUG:
             /* Level sensitive - active high */
             LOG_IRQ("%s: set the debug pin state to %d\n",
                         __func__, level);
-            ppc_set_irq(env, PPC_INTERRUPT_DEBUG, level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
             break;
         default:
             /* Unknown pin - do nothing */
@@ -645,16 +646,20 @@ uint64_t cpu_ppc_load_purr (CPUPPCState *env)
  */
 static inline void cpu_ppc_decr_excp(CPUPPCState *env)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+
     /* Raise it */
     LOG_TB("raise decrementer exception\n");
-    ppc_set_irq(env, PPC_INTERRUPT_DECR, 1);
+    ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1);
 }
 
 static inline void cpu_ppc_hdecr_excp(CPUPPCState *env)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+
     /* Raise it */
     LOG_TB("raise decrementer exception\n");
-    ppc_set_irq(env, PPC_INTERRUPT_HDECR, 1);
+    ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1);
 }
 
 static void __cpu_ppc_store_decr (CPUPPCState *env, uint64_t *nextp,
@@ -829,12 +834,14 @@ struct ppc40x_timer_t {
 /* Fixed interval timer */
 static void cpu_4xx_fit_cb (void *opaque)
 {
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     ppc_tb_t *tb_env;
     ppc40x_timer_t *ppc40x_timer;
     uint64_t now, next;
 
     env = opaque;
+    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     ppc40x_timer = tb_env->opaque;
     now = qemu_get_clock_ns(vm_clock);
@@ -860,8 +867,9 @@ static void cpu_4xx_fit_cb (void *opaque)
         next++;
     qemu_mod_timer(ppc40x_timer->fit_timer, next);
     env->spr[SPR_40x_TSR] |= 1 << 26;
-    if ((env->spr[SPR_40x_TCR] >> 23) & 0x1)
-        ppc_set_irq(env, PPC_INTERRUPT_FIT, 1);
+    if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
+        ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1);
+    }
     LOG_TB("%s: ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx "\n", __func__,
            (int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
            env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
@@ -897,16 +905,19 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
 
 static void cpu_4xx_pit_cb (void *opaque)
 {
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     ppc_tb_t *tb_env;
     ppc40x_timer_t *ppc40x_timer;
 
     env = opaque;
+    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     ppc40x_timer = tb_env->opaque;
     env->spr[SPR_40x_TSR] |= 1 << 27;
-    if ((env->spr[SPR_40x_TCR] >> 26) & 0x1)
-        ppc_set_irq(env, ppc40x_timer->decr_excp, 1);
+    if ((env->spr[SPR_40x_TCR] >> 26) & 0x1) {
+        ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1);
+    }
     start_stop_pit(env, tb_env, 1);
     LOG_TB("%s: ar %d ir %d TCR " TARGET_FMT_lx " TSR " TARGET_FMT_lx " "
            "%016" PRIx64 "\n", __func__,
@@ -919,12 +930,14 @@ static void cpu_4xx_pit_cb (void *opaque)
 /* Watchdog timer */
 static void cpu_4xx_wdt_cb (void *opaque)
 {
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     ppc_tb_t *tb_env;
     ppc40x_timer_t *ppc40x_timer;
     uint64_t now, next;
 
     env = opaque;
+    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     ppc40x_timer = tb_env->opaque;
     now = qemu_get_clock_ns(vm_clock);
@@ -961,8 +974,9 @@ static void cpu_4xx_wdt_cb (void *opaque)
         qemu_mod_timer(ppc40x_timer->wdt_timer, next);
         ppc40x_timer->wdt_next = next;
         env->spr[SPR_40x_TSR] |= 1 << 30;
-        if ((env->spr[SPR_40x_TCR] >> 27) & 0x1)
-            ppc_set_irq(env, PPC_INTERRUPT_WDT, 1);
+        if ((env->spr[SPR_40x_TCR] >> 27) & 0x1) {
+            ppc_set_irq(cpu, PPC_INTERRUPT_WDT, 1);
+        }
         break;
     case 0x3:
         env->spr[SPR_40x_TSR] &= ~0x30000000;
diff --git a/hw/ppc.h b/hw/ppc.h
index 2f3ea27..64463ad 100644
--- a/hw/ppc.h
+++ b/hw/ppc.h
@@ -1,4 +1,4 @@
-void ppc_set_irq (CPUPPCState *env, int n_IRQ, int level);
+void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level);
 
 /* PowerPC hardware exceptions management helpers */
 typedef void (*clk_setup_cb)(void *opaque, uint32_t freq);
diff --git a/hw/ppc_booke.c b/hw/ppc_booke.c
index d51e7fa..da6bc4a 100644
--- a/hw/ppc_booke.c
+++ b/hw/ppc_booke.c
@@ -71,17 +71,19 @@ struct booke_timer_t {
     uint32_t flags;
 };
 
-static void booke_update_irq(CPUPPCState *env)
+static void booke_update_irq(PowerPCCPU *cpu)
 {
-    ppc_set_irq(env, PPC_INTERRUPT_DECR,
+    CPUPPCState *env = &cpu->env;
+
+    ppc_set_irq(cpu, PPC_INTERRUPT_DECR,
                 (env->spr[SPR_BOOKE_TSR] & TSR_DIS
                  && env->spr[SPR_BOOKE_TCR] & TCR_DIE));
 
-    ppc_set_irq(env, PPC_INTERRUPT_WDT,
+    ppc_set_irq(cpu, PPC_INTERRUPT_WDT,
                 (env->spr[SPR_BOOKE_TSR] & TSR_WIS
                  && env->spr[SPR_BOOKE_TCR] & TCR_WIE));
 
-    ppc_set_irq(env, PPC_INTERRUPT_FIT,
+    ppc_set_irq(cpu, PPC_INTERRUPT_FIT,
                 (env->spr[SPR_BOOKE_TSR] & TSR_FIS
                  && env->spr[SPR_BOOKE_TCR] & TCR_FIE));
 }
@@ -154,9 +156,10 @@ static void booke_update_fixed_timer(CPUPPCState         *env,
 static void booke_decr_cb(void *opaque)
 {
     CPUPPCState *env = opaque;
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
 
     env->spr[SPR_BOOKE_TSR] |= TSR_DIS;
-    booke_update_irq(env);
+    booke_update_irq(cpu);
 
     if (env->spr[SPR_BOOKE_TCR] & TCR_ARE) {
         /* Auto Reload */
@@ -166,16 +169,18 @@ static void booke_decr_cb(void *opaque)
 
 static void booke_fit_cb(void *opaque)
 {
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     ppc_tb_t *tb_env;
     booke_timer_t *booke_timer;
 
     env = opaque;
+    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     booke_timer = tb_env->opaque;
     env->spr[SPR_BOOKE_TSR] |= TSR_FIS;
 
-    booke_update_irq(env);
+    booke_update_irq(cpu);
 
     booke_update_fixed_timer(env,
                              booke_get_fit_target(env, tb_env),
@@ -185,17 +190,19 @@ static void booke_fit_cb(void *opaque)
 
 static void booke_wdt_cb(void *opaque)
 {
+    PowerPCCPU *cpu;
     CPUPPCState *env;
     ppc_tb_t *tb_env;
     booke_timer_t *booke_timer;
 
     env = opaque;
+    cpu = ppc_env_get_cpu(env);
     tb_env = env->tb_env;
     booke_timer = tb_env->opaque;
 
     /* TODO: There's lots of complicated stuff to do here */
 
-    booke_update_irq(env);
+    booke_update_irq(cpu);
 
     booke_update_fixed_timer(env,
                              booke_get_wdt_target(env, tb_env),
@@ -205,19 +212,22 @@ static void booke_wdt_cb(void *opaque)
 
 void store_booke_tsr(CPUPPCState *env, target_ulong val)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+
     env->spr[SPR_BOOKE_TSR] &= ~val;
-    booke_update_irq(env);
+    booke_update_irq(cpu);
 }
 
 void store_booke_tcr(CPUPPCState *env, target_ulong val)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_tb_t *tb_env = env->tb_env;
     booke_timer_t *booke_timer = tb_env->opaque;
 
     tb_env = env->tb_env;
     env->spr[SPR_BOOKE_TCR] = val;
 
-    booke_update_irq(env);
+    booke_update_irq(cpu);
 
     booke_update_fixed_timer(env,
                              booke_get_fit_target(env, tb_env),
commit 1bc22652d62f862a5def54f939e87fdb7a5593ae
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Oct 31 06:06:49 2012 +0100

    kvm: Pass CPUState to kvm_vcpu_ioctl()
    
    Adapt helper functions to pass X86CPU / PowerPCCPU / S390CPU.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
index 8b65d51..c4d74ee 100644
--- a/hw/kvm/apic.c
+++ b/hw/kvm/apic.c
@@ -104,7 +104,7 @@ static void kvm_apic_enable_tpr_reporting(APICCommonState *s, bool enable)
         .enabled = enable
     };
 
-    kvm_vcpu_ioctl(&s->cpu->env, KVM_TPR_ACCESS_REPORTING, &ctl);
+    kvm_vcpu_ioctl(CPU(s->cpu), KVM_TPR_ACCESS_REPORTING, &ctl);
 }
 
 static void kvm_apic_vapic_base_update(APICCommonState *s)
@@ -114,7 +114,7 @@ static void kvm_apic_vapic_base_update(APICCommonState *s)
     };
     int ret;
 
-    ret = kvm_vcpu_ioctl(&s->cpu->env, KVM_SET_VAPIC_ADDR, &vapid_addr);
+    ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_VAPIC_ADDR, &vapid_addr);
     if (ret < 0) {
         fprintf(stderr, "KVM: setting VAPIC address failed (%s)\n",
                 strerror(-ret));
@@ -125,15 +125,15 @@ static void kvm_apic_vapic_base_update(APICCommonState *s)
 static void do_inject_external_nmi(void *data)
 {
     APICCommonState *s = data;
-    CPUX86State *env = &s->cpu->env;
+    CPUState *cpu = CPU(s->cpu);
     uint32_t lvt;
     int ret;
 
-    cpu_synchronize_state(env);
+    cpu_synchronize_state(&s->cpu->env);
 
     lvt = s->lvt[APIC_LVT_LINT1];
     if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) {
-        ret = kvm_vcpu_ioctl(env, KVM_NMI);
+        ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
         if (ret < 0) {
             fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
                     strerror(-ret));
diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c
index 824b978..4cf62f6 100644
--- a/hw/kvm/clock.c
+++ b/hw/kvm/clock.c
@@ -76,7 +76,7 @@ static void kvmclock_vm_state_change(void *opaque, int running,
             return;
         }
         for (penv = first_cpu; penv != NULL; penv = penv->next_cpu) {
-            ret = kvm_vcpu_ioctl(penv, KVM_KVMCLOCK_CTRL, 0);
+            ret = kvm_vcpu_ioctl(ENV_GET_CPU(penv), KVM_KVMCLOCK_CTRL, 0);
             if (ret) {
                 if (ret != -EINVAL) {
                     fprintf(stderr, "%s: %s\n", __func__, strerror(-ret));
diff --git a/hw/ppc.c b/hw/ppc.c
index 11fd199..e99a93d 100644
--- a/hw/ppc.c
+++ b/hw/ppc.c
@@ -65,7 +65,7 @@ void ppc_set_irq(CPUPPCState *env, int n_IRQ, int level)
 
     if (old_pending != env->pending_interrupts) {
 #ifdef CONFIG_KVM
-        kvmppc_set_interrupt(env, n_IRQ, level);
+        kvmppc_set_interrupt(ppc_env_get_cpu(env), n_IRQ, level);
 #endif
     }
 
diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index e0ac2d1..716028d 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -111,10 +111,12 @@ VirtIOS390Bus *s390_virtio_bus_init(ram_addr_t *ram_size)
     return bus;
 }
 
-static void s390_virtio_irq(CPUS390XState *env, int config_change, uint64_t token)
+static void s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token)
 {
+    CPUS390XState *env = &cpu->env;
+
     if (kvm_enabled()) {
-        kvm_s390_virtio_irq(env, config_change, token);
+        kvm_s390_virtio_irq(cpu, config_change, token);
     } else {
         cpu_inject_ext(env, VIRTIO_EXT_CODE, config_change, token);
     }
@@ -143,8 +145,7 @@ static int s390_virtio_device_init(VirtIOS390Device *dev, VirtIODevice *vdev)
     s390_virtio_reset_idx(dev);
     if (dev->qdev.hotplugged) {
         S390CPU *cpu = s390_cpu_addr2state(0);
-        CPUS390XState *env = &cpu->env;
-        s390_virtio_irq(env, VIRTIO_PARAM_DEV_ADD, dev->dev_offs);
+        s390_virtio_irq(cpu, VIRTIO_PARAM_DEV_ADD, dev->dev_offs);
     }
 
     return 0;
@@ -369,9 +370,8 @@ static void virtio_s390_notify(void *opaque, uint16_t vector)
     VirtIOS390Device *dev = (VirtIOS390Device*)opaque;
     uint64_t token = s390_virtio_device_vq_token(dev, vector);
     S390CPU *cpu = s390_cpu_addr2state(0);
-    CPUS390XState *env = &cpu->env;
 
-    s390_virtio_irq(env, 0, token);
+    s390_virtio_irq(cpu, 0, token);
 }
 
 static unsigned virtio_s390_get_features(void *opaque)
diff --git a/hw/spapr.c b/hw/spapr.c
index 504d0fc..341f0b9 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -797,7 +797,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
 
         /* Tell KVM that we're in PAPR mode */
         if (kvm_enabled()) {
-            kvmppc_set_papr(env);
+            kvmppc_set_papr(cpu);
         }
 
         qemu_register_reset(spapr_cpu_reset, cpu);
diff --git a/kvm-all.c b/kvm-all.c
index 792cdf1..5f1d1fe 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1557,7 +1557,7 @@ int kvm_cpu_exec(CPUArchState *env)
         }
         qemu_mutex_unlock_iothread();
 
-        run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
+        run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
 
         qemu_mutex_lock_iothread();
         kvm_arch_post_run(cpu, run);
@@ -1658,9 +1658,8 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
     return ret;
 }
 
-int kvm_vcpu_ioctl(CPUArchState *env, int type, ...)
+int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
     int ret;
     void *arg;
     va_list ap;
@@ -1791,9 +1790,9 @@ struct kvm_set_guest_debug_data {
 static void kvm_invoke_set_guest_debug(void *data)
 {
     struct kvm_set_guest_debug_data *dbg_data = data;
-    CPUArchState *env = dbg_data->env;
+    CPUState *cpu = ENV_GET_CPU(dbg_data->env);
 
-    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
+    dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
 }
 
 int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
@@ -1955,18 +1954,19 @@ void kvm_remove_all_breakpoints(CPUArchState *current_env)
 
 int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     struct kvm_signal_mask *sigmask;
     int r;
 
     if (!sigset) {
-        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
+        return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL);
     }
 
     sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
 
     sigmask->len = 8;
     memcpy(sigmask->sigset, sigset, sizeof(*sigset));
-    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
+    r = kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, sigmask);
     g_free(sigmask);
 
     return r;
diff --git a/kvm.h b/kvm.h
index 61f00b7..a2375ff 100644
--- a/kvm.h
+++ b/kvm.h
@@ -152,7 +152,7 @@ int kvm_ioctl(KVMState *s, int type, ...);
 
 int kvm_vm_ioctl(KVMState *s, int type, ...);
 
-int kvm_vcpu_ioctl(CPUArchState *env, int type, ...);
+int kvm_vcpu_ioctl(CPUState *cpu, int type, ...);
 
 /* Arch specific hooks */
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 80cacf3..b2efa1e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -368,8 +368,10 @@ int kvm_arch_on_sigbus(int code, void *addr)
     return 0;
 }
 
-static int kvm_inject_mce_oldstyle(CPUX86State *env)
+static int kvm_inject_mce_oldstyle(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
+
     if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) {
         unsigned int bank, bank_num = env->mcg_cap & 0xff;
         struct kvm_x86_mce mce;
@@ -393,7 +395,7 @@ static int kvm_inject_mce_oldstyle(CPUX86State *env)
         mce.addr = env->mce_banks[bank * 4 + 2];
         mce.misc = env->mce_banks[bank * 4 + 3];
 
-        return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, &mce);
+        return kvm_vcpu_ioctl(CPU(cpu), KVM_X86_SET_MCE, &mce);
     }
     return 0;
 }
@@ -593,7 +595,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         }
         mcg_cap &= MCE_CAP_DEF;
         mcg_cap |= banks;
-        ret = kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, &mcg_cap);
+        ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &mcg_cap);
         if (ret < 0) {
             fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret));
             return ret;
@@ -605,14 +607,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
     qemu_add_vm_change_state_handler(cpu_update_state, env);
 
     cpuid_data.cpuid.padding = 0;
-    r = kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
+    r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
     if (r) {
         return r;
     }
 
     r = kvm_check_extension(env->kvm_state, KVM_CAP_TSC_CONTROL);
     if (r && env->tsc_khz) {
-        r = kvm_vcpu_ioctl(env, KVM_SET_TSC_KHZ, env->tsc_khz);
+        r = kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz);
         if (r < 0) {
             fprintf(stderr, "KVM_SET_TSC_KHZ failed\n");
             return r;
@@ -820,13 +822,14 @@ static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
     }
 }
 
-static int kvm_getput_regs(CPUX86State *env, int set)
+static int kvm_getput_regs(X86CPU *cpu, int set)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_regs regs;
     int ret = 0;
 
     if (!set) {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
+        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, &regs);
         if (ret < 0) {
             return ret;
         }
@@ -855,14 +858,15 @@ static int kvm_getput_regs(CPUX86State *env, int set)
     kvm_getput_reg(&regs.rip, &env->eip, set);
 
     if (set) {
-        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
+        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, &regs);
     }
 
     return ret;
 }
 
-static int kvm_put_fpu(CPUX86State *env)
+static int kvm_put_fpu(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_fpu fpu;
     int i;
 
@@ -880,7 +884,7 @@ static int kvm_put_fpu(CPUX86State *env)
     memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
     fpu.mxcsr = env->mxcsr;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
 }
 
 #define XSAVE_FCW_FSW     0
@@ -893,14 +897,15 @@ static int kvm_put_fpu(CPUX86State *env)
 #define XSAVE_XSTATE_BV   128
 #define XSAVE_YMMH_SPACE  144
 
-static int kvm_put_xsave(CPUX86State *env)
+static int kvm_put_xsave(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_xsave* xsave = env->kvm_xsave_buf;
     uint16_t cwd, swd, twd;
     int i, r;
 
     if (!kvm_has_xsave()) {
-        return kvm_put_fpu(env);
+        return kvm_put_fpu(cpu);
     }
 
     memset(xsave, 0, sizeof(struct kvm_xsave));
@@ -923,12 +928,13 @@ static int kvm_put_xsave(CPUX86State *env)
     *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
     memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
             sizeof env->ymmh_regs);
-    r = kvm_vcpu_ioctl(env, KVM_SET_XSAVE, xsave);
+    r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
     return r;
 }
 
-static int kvm_put_xcrs(CPUX86State *env)
+static int kvm_put_xcrs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_xcrs xcrs;
 
     if (!kvm_has_xcrs()) {
@@ -939,11 +945,12 @@ static int kvm_put_xcrs(CPUX86State *env)
     xcrs.flags = 0;
     xcrs.xcrs[0].xcr = 0;
     xcrs.xcrs[0].value = env->xcr0;
-    return kvm_vcpu_ioctl(env, KVM_SET_XCRS, &xcrs);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs);
 }
 
-static int kvm_put_sregs(CPUX86State *env)
+static int kvm_put_sregs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_sregs sregs;
 
     memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
@@ -988,7 +995,7 @@ static int kvm_put_sregs(CPUX86State *env)
 
     sregs.efer = env->efer;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 }
 
 static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
@@ -998,8 +1005,9 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
     entry->data = value;
 }
 
-static int kvm_put_msrs(CPUX86State *env, int level)
+static int kvm_put_msrs(X86CPU *cpu, int level)
 {
+    CPUX86State *env = &cpu->env;
     struct {
         struct kvm_msrs info;
         struct kvm_msr_entry entries[100];
@@ -1080,17 +1088,18 @@ static int kvm_put_msrs(CPUX86State *env, int level)
 
     msr_data.info.nmsrs = n;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
 
 }
 
 
-static int kvm_get_fpu(CPUX86State *env)
+static int kvm_get_fpu(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_fpu fpu;
     int i, ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_FPU, &fpu);
     if (ret < 0) {
         return ret;
     }
@@ -1111,17 +1120,18 @@ static int kvm_get_fpu(CPUX86State *env)
     return 0;
 }
 
-static int kvm_get_xsave(CPUX86State *env)
+static int kvm_get_xsave(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_xsave* xsave = env->kvm_xsave_buf;
     int ret, i;
     uint16_t cwd, swd, twd;
 
     if (!kvm_has_xsave()) {
-        return kvm_get_fpu(env);
+        return kvm_get_fpu(cpu);
     }
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_XSAVE, xsave);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
     if (ret < 0) {
         return ret;
     }
@@ -1149,8 +1159,9 @@ static int kvm_get_xsave(CPUX86State *env)
     return 0;
 }
 
-static int kvm_get_xcrs(CPUX86State *env)
+static int kvm_get_xcrs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     int i, ret;
     struct kvm_xcrs xcrs;
 
@@ -1158,7 +1169,7 @@ static int kvm_get_xcrs(CPUX86State *env)
         return 0;
     }
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_XCRS, &xcrs);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs);
     if (ret < 0) {
         return ret;
     }
@@ -1173,13 +1184,14 @@ static int kvm_get_xcrs(CPUX86State *env)
     return 0;
 }
 
-static int kvm_get_sregs(CPUX86State *env)
+static int kvm_get_sregs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_sregs sregs;
     uint32_t hflags;
     int bit, i, ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
     if (ret < 0) {
         return ret;
     }
@@ -1257,8 +1269,9 @@ static int kvm_get_sregs(CPUX86State *env)
     return 0;
 }
 
-static int kvm_get_msrs(CPUX86State *env)
+static int kvm_get_msrs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct {
         struct kvm_msrs info;
         struct kvm_msr_entry entries[100];
@@ -1315,7 +1328,7 @@ static int kvm_get_msrs(CPUX86State *env)
     }
 
     msr_data.info.nmsrs = n;
-    ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
     if (ret < 0) {
         return ret;
     }
@@ -1393,11 +1406,11 @@ static int kvm_get_msrs(CPUX86State *env)
     return 0;
 }
 
-static int kvm_put_mp_state(CPUX86State *env)
+static int kvm_put_mp_state(X86CPU *cpu)
 {
-    struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
+    struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state };
 
-    return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
 }
 
 static int kvm_get_mp_state(X86CPU *cpu)
@@ -1406,7 +1419,7 @@ static int kvm_get_mp_state(X86CPU *cpu)
     struct kvm_mp_state mp_state;
     int ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
     if (ret < 0) {
         return ret;
     }
@@ -1417,14 +1430,15 @@ static int kvm_get_mp_state(X86CPU *cpu)
     return 0;
 }
 
-static int kvm_get_apic(CPUX86State *env)
+static int kvm_get_apic(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     DeviceState *apic = env->apic_state;
     struct kvm_lapic_state kapic;
     int ret;
 
     if (apic && kvm_irqchip_in_kernel()) {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, &kapic);
+        ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic);
         if (ret < 0) {
             return ret;
         }
@@ -1434,21 +1448,23 @@ static int kvm_get_apic(CPUX86State *env)
     return 0;
 }
 
-static int kvm_put_apic(CPUX86State *env)
+static int kvm_put_apic(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     DeviceState *apic = env->apic_state;
     struct kvm_lapic_state kapic;
 
     if (apic && kvm_irqchip_in_kernel()) {
         kvm_put_apic_state(apic, &kapic);
 
-        return kvm_vcpu_ioctl(env, KVM_SET_LAPIC, &kapic);
+        return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_LAPIC, &kapic);
     }
     return 0;
 }
 
-static int kvm_put_vcpu_events(CPUX86State *env, int level)
+static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_vcpu_events events;
 
     if (!kvm_has_vcpu_events()) {
@@ -1478,11 +1494,12 @@ static int kvm_put_vcpu_events(CPUX86State *env, int level)
             KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
     }
 
-    return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
 }
 
-static int kvm_get_vcpu_events(CPUX86State *env)
+static int kvm_get_vcpu_events(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_vcpu_events events;
     int ret;
 
@@ -1490,7 +1507,7 @@ static int kvm_get_vcpu_events(CPUX86State *env)
         return 0;
     }
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_VCPU_EVENTS, &events);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
     if (ret < 0) {
        return ret;
     }
@@ -1516,8 +1533,9 @@ static int kvm_get_vcpu_events(CPUX86State *env)
     return 0;
 }
 
-static int kvm_guest_debug_workarounds(CPUX86State *env)
+static int kvm_guest_debug_workarounds(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     int ret = 0;
     unsigned long reinject_trap = 0;
 
@@ -1545,8 +1563,9 @@ static int kvm_guest_debug_workarounds(CPUX86State *env)
     return ret;
 }
 
-static int kvm_put_debugregs(CPUX86State *env)
+static int kvm_put_debugregs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_debugregs dbgregs;
     int i;
 
@@ -1561,11 +1580,12 @@ static int kvm_put_debugregs(CPUX86State *env)
     dbgregs.dr7 = env->dr[7];
     dbgregs.flags = 0;
 
-    return kvm_vcpu_ioctl(env, KVM_SET_DEBUGREGS, &dbgregs);
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs);
 }
 
-static int kvm_get_debugregs(CPUX86State *env)
+static int kvm_get_debugregs(X86CPU *cpu)
 {
+    CPUX86State *env = &cpu->env;
     struct kvm_debugregs dbgregs;
     int i, ret;
 
@@ -1573,7 +1593,7 @@ static int kvm_get_debugregs(CPUX86State *env)
         return 0;
     }
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_DEBUGREGS, &dbgregs);
+    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs);
     if (ret < 0) {
         return ret;
     }
@@ -1589,56 +1609,55 @@ static int kvm_get_debugregs(CPUX86State *env)
 int kvm_arch_put_registers(CPUState *cpu, int level)
 {
     X86CPU *x86_cpu = X86_CPU(cpu);
-    CPUX86State *env = &x86_cpu->env;
     int ret;
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
-    ret = kvm_getput_regs(env, 1);
+    ret = kvm_getput_regs(x86_cpu, 1);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_put_xsave(env);
+    ret = kvm_put_xsave(x86_cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_put_xcrs(env);
+    ret = kvm_put_xcrs(x86_cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_put_sregs(env);
+    ret = kvm_put_sregs(x86_cpu);
     if (ret < 0) {
         return ret;
     }
     /* must be before kvm_put_msrs */
-    ret = kvm_inject_mce_oldstyle(env);
+    ret = kvm_inject_mce_oldstyle(x86_cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_put_msrs(env, level);
+    ret = kvm_put_msrs(x86_cpu, level);
     if (ret < 0) {
         return ret;
     }
     if (level >= KVM_PUT_RESET_STATE) {
-        ret = kvm_put_mp_state(env);
+        ret = kvm_put_mp_state(x86_cpu);
         if (ret < 0) {
             return ret;
         }
-        ret = kvm_put_apic(env);
+        ret = kvm_put_apic(x86_cpu);
         if (ret < 0) {
             return ret;
         }
     }
-    ret = kvm_put_vcpu_events(env, level);
+    ret = kvm_put_vcpu_events(x86_cpu, level);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_put_debugregs(env);
+    ret = kvm_put_debugregs(x86_cpu);
     if (ret < 0) {
         return ret;
     }
     /* must be last */
-    ret = kvm_guest_debug_workarounds(env);
+    ret = kvm_guest_debug_workarounds(x86_cpu);
     if (ret < 0) {
         return ret;
     }
@@ -1648,28 +1667,27 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
 int kvm_arch_get_registers(CPUState *cs)
 {
     X86CPU *cpu = X86_CPU(cs);
-    CPUX86State *env = &cpu->env;
     int ret;
 
     assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
 
-    ret = kvm_getput_regs(env, 0);
+    ret = kvm_getput_regs(cpu, 0);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_xsave(env);
+    ret = kvm_get_xsave(cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_xcrs(env);
+    ret = kvm_get_xcrs(cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_sregs(env);
+    ret = kvm_get_sregs(cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_msrs(env);
+    ret = kvm_get_msrs(cpu);
     if (ret < 0) {
         return ret;
     }
@@ -1677,15 +1695,15 @@ int kvm_arch_get_registers(CPUState *cs)
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_apic(env);
+    ret = kvm_get_apic(cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_vcpu_events(env);
+    ret = kvm_get_vcpu_events(cpu);
     if (ret < 0) {
         return ret;
     }
-    ret = kvm_get_debugregs(env);
+    ret = kvm_get_debugregs(cpu);
     if (ret < 0) {
         return ret;
     }
@@ -1702,7 +1720,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
     if (env->interrupt_request & CPU_INTERRUPT_NMI) {
         env->interrupt_request &= ~CPU_INTERRUPT_NMI;
         DPRINTF("injected NMI\n");
-        ret = kvm_vcpu_ioctl(env, KVM_NMI);
+        ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
         if (ret < 0) {
             fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
                     strerror(-ret));
@@ -1730,7 +1748,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
 
                 intr.irq = irq;
                 DPRINTF("injected interrupt %d\n", irq);
-                ret = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+                ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
                 if (ret < 0) {
                     fprintf(stderr,
                             "KVM: injection failed, interrupt lost (%s)\n",
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 8a59b70..ad5bc66 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -99,8 +99,10 @@ int kvm_arch_init(KVMState *s)
     return 0;
 }
 
-static int kvm_arch_sync_sregs(CPUPPCState *cenv)
+static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 {
+    CPUPPCState *cenv = &cpu->env;
+    CPUState *cs = CPU(cpu);
     struct kvm_sregs sregs;
     int ret;
 
@@ -117,18 +119,20 @@ static int kvm_arch_sync_sregs(CPUPPCState *cenv)
         }
     }
 
-    ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
     if (ret) {
         return ret;
     }
 
     sregs.pvr = cenv->spr[SPR_PVR];
-    return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
+    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 }
 
 /* Set up a shared TLB array with KVM */
-static int kvm_booke206_tlb_init(CPUPPCState *env)
+static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 {
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
     struct kvm_book3e_206_tlb_params params = {};
     struct kvm_config_tlb cfg = {};
     struct kvm_enable_cap encap = {};
@@ -161,7 +165,7 @@ static int kvm_booke206_tlb_init(CPUPPCState *env)
     encap.cap = KVM_CAP_SW_TLB;
     encap.args[0] = (uintptr_t)&cfg;
 
-    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
+    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
     if (ret < 0) {
         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
                 __func__, strerror(-ret));
@@ -385,7 +389,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     kvm_fixup_page_sizes(cenv);
 
     /* Synchronize sregs with kvm */
-    ret = kvm_arch_sync_sregs(cenv);
+    ret = kvm_arch_sync_sregs(cpu);
     if (ret) {
         return ret;
     }
@@ -395,7 +399,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     /* Some targets support access to KVM's guest TLB. */
     switch (cenv->mmu_model) {
     case POWERPC_MMU_BOOKE206:
-        ret = kvm_booke206_tlb_init(cenv);
+        ret = kvm_booke206_tlb_init(cpu);
         break;
     default:
         break;
@@ -408,8 +412,10 @@ void kvm_arch_reset_vcpu(CPUState *cpu)
 {
 }
 
-static void kvm_sw_tlb_put(CPUPPCState *env)
+static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 {
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
     struct kvm_dirty_tlb dirty_tlb;
     unsigned char *bitmap;
     int ret;
@@ -424,7 +430,7 @@ static void kvm_sw_tlb_put(CPUPPCState *env)
     dirty_tlb.bitmap = (uintptr_t)bitmap;
     dirty_tlb.num_dirty = env->nb_tlb;
 
-    ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
+    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
     if (ret) {
         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
                 __func__, strerror(-ret));
@@ -441,9 +447,10 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     int ret;
     int i;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
-    if (ret < 0)
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
+    if (ret < 0) {
         return ret;
+    }
 
     regs.ctr = env->ctr;
     regs.lr  = env->lr;
@@ -468,12 +475,12 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     for (i = 0;i < 32; i++)
         regs.gpr[i] = env->gpr[i];
 
-    ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
     if (ret < 0)
         return ret;
 
     if (env->tlb_dirty) {
-        kvm_sw_tlb_put(env);
+        kvm_sw_tlb_put(cpu);
         env->tlb_dirty = false;
     }
 
@@ -506,7 +513,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
                 | env->IBAT[1][i];
         }
 
-        ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+        ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
         if (ret) {
             return ret;
         }
@@ -519,7 +526,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
             .addr = (uintptr_t) &hior,
         };
 
-        ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
+        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
         if (ret) {
             return ret;
         }
@@ -537,7 +544,7 @@ int kvm_arch_get_registers(CPUState *cs)
     uint32_t cr;
     int i, ret;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
     if (ret < 0)
         return ret;
 
@@ -571,7 +578,7 @@ int kvm_arch_get_registers(CPUState *cs)
         env->gpr[i] = regs.gpr[i];
 
     if (cap_booke_sregs) {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
         if (ret < 0) {
             return ret;
         }
@@ -675,7 +682,7 @@ int kvm_arch_get_registers(CPUState *cs)
     }
 
     if (cap_segstate) {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
         if (ret < 0) {
             return ret;
         }
@@ -707,7 +714,7 @@ int kvm_arch_get_registers(CPUState *cs)
     return 0;
 }
 
-int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
+int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
 {
     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 
@@ -719,7 +726,7 @@ int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
         return 0;
     }
 
-    kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
+    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
 
     return 0;
 }
@@ -753,7 +760,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
         irq = KVM_INTERRUPT_SET;
 
         dprintf("injected interrupt %d\n", irq);
-        r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
+        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
         if (r < 0)
             printf("cpu %d fail inject %x\n", env->cpu_index, irq);
 
@@ -1007,13 +1014,15 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
     return 0;
 }
 
-void kvmppc_set_papr(CPUPPCState *env)
+void kvmppc_set_papr(PowerPCCPU *cpu)
 {
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
     struct kvm_enable_cap cap = {};
     int ret;
 
     cap.cap = KVM_CAP_PPC_PAPR;
-    ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
+    ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
 
     if (ret) {
         cpu_abort(env, "This KVM version does not support PAPR\n");
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index baad6eb..369c7fe 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -20,8 +20,8 @@ uint64_t kvmppc_get_clockfreq(void);
 uint32_t kvmppc_get_vmx(void);
 uint32_t kvmppc_get_dfp(void);
 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
-int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level);
-void kvmppc_set_papr(CPUPPCState *env);
+int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
+void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_smt_threads(void);
 #ifndef CONFIG_USER_ONLY
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
@@ -65,12 +65,12 @@ static inline int kvmppc_read_segment_page_sizes(uint32_t *prop, int maxcells)
     return -1;
 }
 
-static inline int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
+static inline int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
 {
     return -1;
 }
 
-static inline void kvmppc_set_papr(CPUPPCState *env)
+static inline void kvmppc_set_papr(PowerPCCPU *cpu)
 {
 }
 
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 0f9a1f7..7e7e3b7 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -296,21 +296,21 @@ void s390x_cpu_timer(void *opaque);
 int s390_virtio_hypercall(CPUS390XState *env, uint64_t mem, uint64_t hypercall);
 
 #ifdef CONFIG_KVM
-void kvm_s390_interrupt(CPUS390XState *env, int type, uint32_t code);
-void kvm_s390_virtio_irq(CPUS390XState *env, int config_change, uint64_t token);
-void kvm_s390_interrupt_internal(CPUS390XState *env, int type, uint32_t parm,
+void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code);
+void kvm_s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token);
+void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm,
                                  uint64_t parm64, int vm);
 #else
-static inline void kvm_s390_interrupt(CPUS390XState *env, int type, uint32_t code)
+static inline void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code)
 {
 }
 
-static inline void kvm_s390_virtio_irq(CPUS390XState *env, int config_change,
+static inline void kvm_s390_virtio_irq(S390CPU *cpu, int config_change,
                                        uint64_t token)
 {
 }
 
-static inline void kvm_s390_interrupt_internal(CPUS390XState *env, int type,
+static inline void kvm_s390_interrupt_internal(S390CPU *cpu, int type,
                                                uint32_t parm, uint64_t parm64,
                                                int vm)
 {
diff --git a/target-s390x/interrupt.c b/target-s390x/interrupt.c
index c1b034f..97487bd 100644
--- a/target-s390x/interrupt.c
+++ b/target-s390x/interrupt.c
@@ -19,7 +19,8 @@ void s390_sclp_extint(uint32_t parm)
 
     if (kvm_enabled()) {
 #ifdef CONFIG_KVM
-        kvm_s390_interrupt_internal(env, KVM_S390_INT_SERVICE, parm, 0, 1);
+        kvm_s390_interrupt_internal(dummy_cpu, KVM_S390_INT_SERVICE, parm,
+                                    0, 1);
 #endif
     } else {
         env->psw.addr += 4;
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index d4e6ab2..5422678 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -74,10 +74,9 @@ int kvm_arch_init(KVMState *s)
 
 int kvm_arch_init_vcpu(CPUState *cpu)
 {
-    CPUS390XState *env = &S390_CPU(cpu)->env;
     int ret = 0;
 
-    if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL) < 0) {
+    if (kvm_vcpu_ioctl(cpu, KVM_S390_INITIAL_RESET, NULL) < 0) {
         perror("cannot init reset vcpu");
     }
 
@@ -111,7 +110,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         for (i = 0; i < 16; i++) {
             regs.gprs[i] = env->regs[i];
         }
-        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
+        ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
         if (ret < 0) {
             return ret;
         }
@@ -136,7 +135,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
             sregs.acrs[i] = env->aregs[i];
             sregs.crs[i] = env->cregs[i];
         }
-        ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+        ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
         if (ret < 0) {
             return ret;
         }
@@ -171,7 +170,7 @@ int kvm_arch_get_registers(CPUState *cs)
             env->regs[i] = env->kvm_run->s.regs.gprs[i];
         }
     } else {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
         if (ret < 0) {
             return ret;
         }
@@ -189,7 +188,7 @@ int kvm_arch_get_registers(CPUState *cs)
             env->cregs[i] = env->kvm_run->s.regs.crs[i];
         }
     } else {
-        ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
         if (ret < 0) {
             return ret;
         }
@@ -289,9 +288,11 @@ int kvm_arch_process_async_events(CPUState *cs)
     return cpu->env.halted;
 }
 
-void kvm_s390_interrupt_internal(CPUS390XState *env, int type, uint32_t parm,
+void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm,
                                  uint64_t parm64, int vm)
 {
+    CPUS390XState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
     struct kvm_s390_interrupt kvmint;
     int r;
 
@@ -306,7 +307,7 @@ void kvm_s390_interrupt_internal(CPUS390XState *env, int type, uint32_t parm,
     if (vm) {
         r = kvm_vm_ioctl(env->kvm_state, KVM_S390_INTERRUPT, &kvmint);
     } else {
-        r = kvm_vcpu_ioctl(env, KVM_S390_INTERRUPT, &kvmint);
+        r = kvm_vcpu_ioctl(cs, KVM_S390_INTERRUPT, &kvmint);
     }
 
     if (r < 0) {
@@ -315,20 +316,20 @@ void kvm_s390_interrupt_internal(CPUS390XState *env, int type, uint32_t parm,
     }
 }
 
-void kvm_s390_virtio_irq(CPUS390XState *env, int config_change, uint64_t token)
+void kvm_s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token)
 {
-    kvm_s390_interrupt_internal(env, KVM_S390_INT_VIRTIO, config_change,
+    kvm_s390_interrupt_internal(cpu, KVM_S390_INT_VIRTIO, config_change,
                                 token, 1);
 }
 
-void kvm_s390_interrupt(CPUS390XState *env, int type, uint32_t code)
+void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code)
 {
-    kvm_s390_interrupt_internal(env, type, code, 0, 0);
+    kvm_s390_interrupt_internal(cpu, type, code, 0, 0);
 }
 
-static void enter_pgmcheck(CPUS390XState *env, uint16_t code)
+static void enter_pgmcheck(S390CPU *cpu, uint16_t code)
 {
-    kvm_s390_interrupt(env, KVM_S390_PROGRAM_INT, code);
+    kvm_s390_interrupt(cpu, KVM_S390_PROGRAM_INT, code);
 }
 
 static inline void setcc(CPUS390XState *env, uint64_t cc)
@@ -340,9 +341,10 @@ static inline void setcc(CPUS390XState *env, uint64_t cc)
     env->psw.mask |= (cc & 3) << 44;
 }
 
-static int kvm_sclp_service_call(CPUS390XState *env, struct kvm_run *run,
+static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run,
                                  uint16_t ipbh0)
 {
+    CPUS390XState *env = &cpu->env;
     uint32_t sccb;
     uint64_t code;
     int r = 0;
@@ -353,14 +355,14 @@ static int kvm_sclp_service_call(CPUS390XState *env, struct kvm_run *run,
 
     r = sclp_service_call(sccb, code);
     if (r < 0) {
-        enter_pgmcheck(env, -r);
+        enter_pgmcheck(cpu, -r);
     }
     setcc(env, r);
 
     return 0;
 }
 
-static int handle_priv(CPUS390XState *env, struct kvm_run *run, uint8_t ipa1)
+static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
     int r = 0;
     uint16_t ipbh0 = (run->s390_sieic.ipb & 0xffff0000) >> 16;
@@ -368,7 +370,7 @@ static int handle_priv(CPUS390XState *env, struct kvm_run *run, uint8_t ipa1)
     dprintf("KVM: PRIV: %d\n", ipa1);
     switch (ipa1) {
         case PRIV_SCLP_CALL:
-            r = kvm_sclp_service_call(env, run, ipbh0);
+            r = kvm_sclp_service_call(cpu, run, ipbh0);
             break;
         default:
             dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
@@ -411,7 +413,7 @@ static int s390_cpu_restart(S390CPU *cpu)
 {
     CPUS390XState *env = &cpu->env;
 
-    kvm_s390_interrupt(env, KVM_S390_RESTART, 0);
+    kvm_s390_interrupt(cpu, KVM_S390_RESTART, 0);
     s390_add_running_cpu(env);
     qemu_cpu_kick(CPU(cpu));
     dprintf("DONE: SIGP cpu restart: %p\n", env);
@@ -425,12 +427,13 @@ static int s390_store_status(CPUS390XState *env, uint32_t parameter)
     return -1;
 }
 
-static int s390_cpu_initial_reset(CPUS390XState *env)
+static int s390_cpu_initial_reset(S390CPU *cpu)
 {
+    CPUS390XState *env = &cpu->env;
     int i;
 
     s390_del_running_cpu(env);
-    if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL) < 0) {
+    if (kvm_vcpu_ioctl(CPU(cpu), KVM_S390_INITIAL_RESET, NULL) < 0) {
         perror("cannot init reset vcpu");
     }
 
@@ -489,7 +492,7 @@ static int handle_sigp(CPUS390XState *env, struct kvm_run *run, uint8_t ipa1)
             /* make the caller panic */
             return -1;
         case SIGP_INITIAL_CPU_RESET:
-            r = s390_cpu_initial_reset(target_env);
+            r = s390_cpu_initial_reset(target_cpu);
             break;
         default:
             fprintf(stderr, "KVM: unknown SIGP: 0x%x\n", order_code);
@@ -501,8 +504,9 @@ out:
     return 0;
 }
 
-static int handle_instruction(CPUS390XState *env, struct kvm_run *run)
+static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
 {
+    CPUS390XState *env = &cpu->env;
     unsigned int ipa0 = (run->s390_sieic.ipa & 0xff00);
     uint8_t ipa1 = run->s390_sieic.ipa & 0x00ff;
     int ipb_code = (run->s390_sieic.ipb & 0x0fff0000) >> 16;
@@ -511,7 +515,7 @@ static int handle_instruction(CPUS390XState *env, struct kvm_run *run)
     dprintf("handle_instruction 0x%x 0x%x\n", run->s390_sieic.ipa, run->s390_sieic.ipb);
     switch (ipa0) {
         case IPA0_PRIV:
-            r = handle_priv(env, run, ipa1);
+            r = handle_priv(cpu, run, ipa1);
             break;
         case IPA0_DIAG:
             r = handle_diag(env, run, ipb_code);
@@ -522,7 +526,7 @@ static int handle_instruction(CPUS390XState *env, struct kvm_run *run)
     }
 
     if (r < 0) {
-        enter_pgmcheck(env, 0x0001);
+        enter_pgmcheck(cpu, 0x0001);
     }
     return 0;
 }
@@ -533,8 +537,9 @@ static bool is_special_wait_psw(CPUS390XState *env)
     return env->kvm_run->psw_addr == 0xfffUL;
 }
 
-static int handle_intercept(CPUS390XState *env)
+static int handle_intercept(S390CPU *cpu)
 {
+    CPUS390XState *env = &cpu->env;
     struct kvm_run *run = env->kvm_run;
     int icpt_code = run->s390_sieic.icptcode;
     int r = 0;
@@ -543,7 +548,7 @@ static int handle_intercept(CPUS390XState *env)
             (long)env->kvm_run->psw_addr);
     switch (icpt_code) {
         case ICPT_INSTRUCTION:
-            r = handle_instruction(env, run);
+            r = handle_instruction(cpu, run);
             break;
         case ICPT_WAITPSW:
             if (s390_del_running_cpu(env) == 0 &&
@@ -578,12 +583,11 @@ static int handle_intercept(CPUS390XState *env)
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     S390CPU *cpu = S390_CPU(cs);
-    CPUS390XState *env = &cpu->env;
     int ret = 0;
 
     switch (run->exit_reason) {
         case KVM_EXIT_S390_SIEIC:
-            ret = handle_intercept(env);
+            ret = handle_intercept(cpu);
             break;
         case KVM_EXIT_S390_RESET:
             qemu_system_reset_request();
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 38d8f2a..0834a19 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -57,7 +57,7 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilc)
 
     if (kvm_enabled()) {
 #ifdef CONFIG_KVM
-        kvm_s390_interrupt(env, KVM_S390_PROGRAM_INT, code);
+        kvm_s390_interrupt(s390_env_get_cpu(env), KVM_S390_PROGRAM_INT, code);
 #endif
     } else {
         env->int_pgm_code = code;
commit 20d695a9254c1b086a456d3b79a3c311236643ba
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Oct 31 06:57:49 2012 +0100

    kvm: Pass CPUState to kvm_arch_*
    
    Move kvm_vcpu_dirty field into CPUState to simplify things and change
    its type to bool while at it.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/cpu-defs.h b/cpu-defs.h
index 6373a80..a382e35 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -206,7 +206,6 @@ typedef struct CPUWatchpoint {
                                                                         \
     const char *cpu_model_str;                                          \
     struct KVMState *kvm_state;                                         \
-    struct kvm_run *kvm_run;                                            \
-    int kvm_vcpu_dirty;
+    struct kvm_run *kvm_run;
 
 #endif
diff --git a/include/qemu/cpu.h b/include/qemu/cpu.h
index 6049a20..b8f8dd1 100644
--- a/include/qemu/cpu.h
+++ b/include/qemu/cpu.h
@@ -80,6 +80,7 @@ struct CPUState {
 
 #if !defined(CONFIG_USER_ONLY)
     int kvm_fd;
+    bool kvm_vcpu_dirty;
 #endif
 
     /* TODO Move common fields from CPUArchState here. */
diff --git a/kvm-all.c b/kvm-all.c
index 8a00df7..792cdf1 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -209,9 +209,9 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
 
 static void kvm_reset_vcpu(void *opaque)
 {
-    CPUArchState *env = opaque;
+    CPUState *cpu = opaque;
 
-    kvm_arch_reset_vcpu(env);
+    kvm_arch_reset_vcpu(cpu);
 }
 
 int kvm_init_vcpu(CPUArchState *env)
@@ -231,7 +231,7 @@ int kvm_init_vcpu(CPUArchState *env)
 
     cpu->kvm_fd = ret;
     env->kvm_state = s;
-    env->kvm_vcpu_dirty = 1;
+    cpu->kvm_vcpu_dirty = true;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
@@ -253,10 +253,10 @@ int kvm_init_vcpu(CPUArchState *env)
             (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
     }
 
-    ret = kvm_arch_init_vcpu(env);
+    ret = kvm_arch_init_vcpu(cpu);
     if (ret == 0) {
-        qemu_register_reset(kvm_reset_vcpu, env);
-        kvm_arch_reset_vcpu(env);
+        qemu_register_reset(kvm_reset_vcpu, cpu);
+        kvm_arch_reset_vcpu(cpu);
     }
 err:
     return ret;
@@ -1438,6 +1438,8 @@ static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
 
 static int kvm_handle_internal_error(CPUArchState *env, struct kvm_run *run)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
+
     fprintf(stderr, "KVM internal error.");
     if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
         int i;
@@ -1452,7 +1454,7 @@ static int kvm_handle_internal_error(CPUArchState *env, struct kvm_run *run)
     }
     if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
         fprintf(stderr, "emulation failure\n");
-        if (!kvm_arch_stop_on_emulation_error(env)) {
+        if (!kvm_arch_stop_on_emulation_error(cpu)) {
             cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
             return EXCP_INTERRUPT;
         }
@@ -1489,13 +1491,13 @@ void kvm_flush_coalesced_mmio_buffer(void)
     s->coalesced_flush_in_progress = false;
 }
 
-static void do_kvm_cpu_synchronize_state(void *_env)
+static void do_kvm_cpu_synchronize_state(void *arg)
 {
-    CPUArchState *env = _env;
+    CPUState *cpu = arg;
 
-    if (!env->kvm_vcpu_dirty) {
-        kvm_arch_get_registers(env);
-        env->kvm_vcpu_dirty = 1;
+    if (!cpu->kvm_vcpu_dirty) {
+        kvm_arch_get_registers(cpu);
+        cpu->kvm_vcpu_dirty = true;
     }
 }
 
@@ -1503,42 +1505,47 @@ void kvm_cpu_synchronize_state(CPUArchState *env)
 {
     CPUState *cpu = ENV_GET_CPU(env);
 
-    if (!env->kvm_vcpu_dirty) {
-        run_on_cpu(cpu, do_kvm_cpu_synchronize_state, env);
+    if (!cpu->kvm_vcpu_dirty) {
+        run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu);
     }
 }
 
 void kvm_cpu_synchronize_post_reset(CPUArchState *env)
 {
-    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
-    env->kvm_vcpu_dirty = 0;
+    CPUState *cpu = ENV_GET_CPU(env);
+
+    kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
+    cpu->kvm_vcpu_dirty = false;
 }
 
 void kvm_cpu_synchronize_post_init(CPUArchState *env)
 {
-    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
-    env->kvm_vcpu_dirty = 0;
+    CPUState *cpu = ENV_GET_CPU(env);
+
+    kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
+    cpu->kvm_vcpu_dirty = false;
 }
 
 int kvm_cpu_exec(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     struct kvm_run *run = env->kvm_run;
     int ret, run_ret;
 
     DPRINTF("kvm_cpu_exec()\n");
 
-    if (kvm_arch_process_async_events(env)) {
+    if (kvm_arch_process_async_events(cpu)) {
         env->exit_request = 0;
         return EXCP_HLT;
     }
 
     do {
-        if (env->kvm_vcpu_dirty) {
-            kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
-            env->kvm_vcpu_dirty = 0;
+        if (cpu->kvm_vcpu_dirty) {
+            kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
+            cpu->kvm_vcpu_dirty = false;
         }
 
-        kvm_arch_pre_run(env, run);
+        kvm_arch_pre_run(cpu, run);
         if (env->exit_request) {
             DPRINTF("interrupt exit requested\n");
             /*
@@ -1553,7 +1560,7 @@ int kvm_cpu_exec(CPUArchState *env)
         run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
 
         qemu_mutex_lock_iothread();
-        kvm_arch_post_run(env, run);
+        kvm_arch_post_run(cpu, run);
 
         if (run_ret < 0) {
             if (run_ret == -EINTR || run_ret == -EAGAIN) {
@@ -1603,7 +1610,7 @@ int kvm_cpu_exec(CPUArchState *env)
             break;
         default:
             DPRINTF("kvm_arch_handle_exit\n");
-            ret = kvm_arch_handle_exit(env, run);
+            ret = kvm_arch_handle_exit(cpu, run);
             break;
         }
     } while (ret == 0);
@@ -1799,7 +1806,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
     if (env->singlestep_enabled) {
         data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
     }
-    kvm_arch_update_guest_debug(env, &data.dbg);
+    kvm_arch_update_guest_debug(cpu, &data.dbg);
     data.env = env;
 
     run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
@@ -1809,6 +1816,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
 int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
                           target_ulong len, int type)
 {
+    CPUState *current_cpu = ENV_GET_CPU(current_env);
     struct kvm_sw_breakpoint *bp;
     CPUArchState *env;
     int err;
@@ -1827,7 +1835,7 @@ int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
 
         bp->pc = addr;
         bp->use_count = 1;
-        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
+        err = kvm_arch_insert_sw_breakpoint(current_cpu, bp);
         if (err) {
             g_free(bp);
             return err;
@@ -1854,6 +1862,7 @@ int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
 int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
                           target_ulong len, int type)
 {
+    CPUState *current_cpu = ENV_GET_CPU(current_env);
     struct kvm_sw_breakpoint *bp;
     CPUArchState *env;
     int err;
@@ -1869,7 +1878,7 @@ int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
             return 0;
         }
 
-        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
+        err = kvm_arch_remove_sw_breakpoint(current_cpu, bp);
         if (err) {
             return err;
         }
@@ -1894,15 +1903,18 @@ int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
 
 void kvm_remove_all_breakpoints(CPUArchState *current_env)
 {
+    CPUState *current_cpu = ENV_GET_CPU(current_env);
     struct kvm_sw_breakpoint *bp, *next;
     KVMState *s = current_env->kvm_state;
     CPUArchState *env;
+    CPUState *cpu;
 
     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
-        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
+        if (kvm_arch_remove_sw_breakpoint(current_cpu, bp) != 0) {
             /* Try harder to find a CPU that currently sees the breakpoint. */
             for (env = first_cpu; env != NULL; env = env->next_cpu) {
-                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) {
+                cpu = ENV_GET_CPU(env);
+                if (kvm_arch_remove_sw_breakpoint(cpu, bp) == 0) {
                     break;
                 }
             }
@@ -2014,7 +2026,8 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
 
 int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
 {
-    return kvm_arch_on_sigbus_vcpu(env, code, addr);
+    CPUState *cpu = ENV_GET_CPU(env);
+    return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
 }
 
 int kvm_on_sigbus(int code, void *addr)
diff --git a/kvm.h b/kvm.h
index 72d866a..61f00b7 100644
--- a/kvm.h
+++ b/kvm.h
@@ -158,14 +158,14 @@ int kvm_vcpu_ioctl(CPUArchState *env, int type, ...);
 
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
 
-void kvm_arch_pre_run(CPUArchState *env, struct kvm_run *run);
-void kvm_arch_post_run(CPUArchState *env, struct kvm_run *run);
+void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run);
+void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run);
 
-int kvm_arch_handle_exit(CPUArchState *env, struct kvm_run *run);
+int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run);
 
-int kvm_arch_process_async_events(CPUArchState *env);
+int kvm_arch_process_async_events(CPUState *cpu);
 
-int kvm_arch_get_registers(CPUArchState *env);
+int kvm_arch_get_registers(CPUState *cpu);
 
 /* state subset only touched by the VCPU itself during runtime */
 #define KVM_PUT_RUNTIME_STATE   1
@@ -174,15 +174,15 @@ int kvm_arch_get_registers(CPUArchState *env);
 /* full state set, modified during initialization or on vmload */
 #define KVM_PUT_FULL_STATE      3
 
-int kvm_arch_put_registers(CPUArchState *env, int level);
+int kvm_arch_put_registers(CPUState *cpu, int level);
 
 int kvm_arch_init(KVMState *s);
 
-int kvm_arch_init_vcpu(CPUArchState *env);
+int kvm_arch_init_vcpu(CPUState *cpu);
 
-void kvm_arch_reset_vcpu(CPUArchState *env);
+void kvm_arch_reset_vcpu(CPUState *cpu);
 
-int kvm_arch_on_sigbus_vcpu(CPUArchState *env, int code, void *addr);
+int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
 int kvm_arch_on_sigbus(int code, void *addr);
 
 void kvm_arch_init_irq_routing(KVMState *s);
@@ -212,9 +212,9 @@ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
 
 int kvm_sw_breakpoints_active(CPUArchState *env);
 
-int kvm_arch_insert_sw_breakpoint(CPUArchState *current_env,
+int kvm_arch_insert_sw_breakpoint(CPUState *current_cpu,
                                   struct kvm_sw_breakpoint *bp);
-int kvm_arch_remove_sw_breakpoint(CPUArchState *current_env,
+int kvm_arch_remove_sw_breakpoint(CPUState *current_cpu,
                                   struct kvm_sw_breakpoint *bp);
 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
                                   target_ulong len, int type);
@@ -222,9 +222,9 @@ int kvm_arch_remove_hw_breakpoint(target_ulong addr,
                                   target_ulong len, int type);
 void kvm_arch_remove_all_hw_breakpoints(void);
 
-void kvm_arch_update_guest_debug(CPUArchState *env, struct kvm_guest_debug *dbg);
+void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg);
 
-bool kvm_arch_stop_on_emulation_error(CPUArchState *env);
+bool kvm_arch_stop_on_emulation_error(CPUState *cpu);
 
 int kvm_check_extension(KVMState *s, unsigned int extension);
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index f669281..80cacf3 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -306,9 +306,10 @@ static void hardware_memory_error(void)
     exit(1);
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUX86State *env, int code, void *addr)
+int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
 {
-    X86CPU *cpu = x86_env_get_cpu(env);
+    X86CPU *cpu = X86_CPU(c);
+    CPUX86State *env = &cpu->env;
     ram_addr_t ram_addr;
     hwaddr paddr;
 
@@ -406,12 +407,14 @@ static void cpu_update_state(void *opaque, int running, RunState state)
     }
 }
 
-int kvm_arch_init_vcpu(CPUX86State *env)
+int kvm_arch_init_vcpu(CPUState *cs)
 {
     struct {
         struct kvm_cpuid2 cpuid;
         struct kvm_cpuid_entry2 entries[100];
     } QEMU_PACKED cpuid_data;
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
     uint32_t limit, i, j, cpuid_i;
     uint32_t unused;
     struct kvm_cpuid_entry2 *c;
@@ -623,9 +626,10 @@ int kvm_arch_init_vcpu(CPUX86State *env)
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUX86State *env)
+void kvm_arch_reset_vcpu(CPUState *cs)
 {
-    X86CPU *cpu = x86_env_get_cpu(env);
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
 
     env->exception_injected = -1;
     env->interrupt_injected = -1;
@@ -1582,9 +1586,10 @@ static int kvm_get_debugregs(CPUX86State *env)
     return 0;
 }
 
-int kvm_arch_put_registers(CPUX86State *env, int level)
+int kvm_arch_put_registers(CPUState *cpu, int level)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    CPUX86State *env = &x86_cpu->env;
     int ret;
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
@@ -1640,12 +1645,13 @@ int kvm_arch_put_registers(CPUX86State *env, int level)
     return 0;
 }
 
-int kvm_arch_get_registers(CPUX86State *env)
+int kvm_arch_get_registers(CPUState *cs)
 {
-    X86CPU *cpu = x86_env_get_cpu(env);
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
     int ret;
 
-    assert(cpu_is_stopped(CPU(cpu)) || qemu_cpu_is_self(CPU(cpu)));
+    assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
 
     ret = kvm_getput_regs(env, 0);
     if (ret < 0) {
@@ -1686,8 +1692,10 @@ int kvm_arch_get_registers(CPUX86State *env)
     return 0;
 }
 
-void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
 {
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    CPUX86State *env = &x86_cpu->env;
     int ret;
 
     /* Inject NMI */
@@ -1746,8 +1754,11 @@ void kvm_arch_pre_run(CPUX86State *env, struct kvm_run *run)
     }
 }
 
-void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 {
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    CPUX86State *env = &x86_cpu->env;
+
     if (run->if_flag) {
         env->eflags |= IF_MASK;
     } else {
@@ -1757,9 +1768,10 @@ void kvm_arch_post_run(CPUX86State *env, struct kvm_run *run)
     cpu_set_apic_base(env->apic_state, run->apic_base);
 }
 
-int kvm_arch_process_async_events(CPUX86State *env)
+int kvm_arch_process_async_events(CPUState *cs)
 {
-    X86CPU *cpu = x86_env_get_cpu(env);
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
 
     if (env->interrupt_request & CPU_INTERRUPT_MCE) {
         /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */
@@ -1839,8 +1851,9 @@ static int kvm_handle_tpr_access(CPUX86State *env)
     return 1;
 }
 
-int kvm_arch_insert_sw_breakpoint(CPUX86State *env, struct kvm_sw_breakpoint *bp)
+int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
 {
+    CPUX86State *env = &X86_CPU(cpu)->env;
     static const uint8_t int3 = 0xcc;
 
     if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
@@ -1850,8 +1863,9 @@ int kvm_arch_insert_sw_breakpoint(CPUX86State *env, struct kvm_sw_breakpoint *bp
     return 0;
 }
 
-int kvm_arch_remove_sw_breakpoint(CPUX86State *env, struct kvm_sw_breakpoint *bp)
+int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
 {
+    CPUX86State *env = &X86_CPU(cpu)->env;
     uint8_t int3;
 
     if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
@@ -1994,8 +2008,9 @@ static int kvm_handle_debug(CPUX86State *env,
     return ret;
 }
 
-void kvm_arch_update_guest_debug(CPUX86State *env, struct kvm_guest_debug *dbg)
+void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
 {
+    CPUX86State *env = &X86_CPU(cpu)->env;
     const uint8_t type_code[] = {
         [GDB_BREAKPOINT_HW] = 0x0,
         [GDB_WATCHPOINT_WRITE] = 0x1,
@@ -2031,9 +2046,10 @@ static bool host_supports_vmx(void)
 
 #define VMX_INVALID_GUEST_STATE 0x80000021
 
-int kvm_arch_handle_exit(CPUX86State *env, struct kvm_run *run)
+int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
-    X86CPU *cpu = x86_env_get_cpu(env);
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
     uint64_t code;
     int ret;
 
@@ -2083,8 +2099,11 @@ int kvm_arch_handle_exit(CPUX86State *env, struct kvm_run *run)
     return ret;
 }
 
-bool kvm_arch_stop_on_emulation_error(CPUX86State *env)
+bool kvm_arch_stop_on_emulation_error(CPUState *cs)
 {
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
     kvm_cpu_synchronize_state(env);
     return !(env->cr[0] & CR0_PE_MASK) ||
            ((env->segs[R_CS].selector  & 3) != 3);
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 3f5df57..8a59b70 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -375,9 +375,10 @@ static inline void kvm_fixup_page_sizes(CPUPPCState *env)
 
 #endif /* !defined (TARGET_PPC64) */
 
-int kvm_arch_init_vcpu(CPUPPCState *cenv)
+int kvm_arch_init_vcpu(CPUState *cs)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *cenv = &cpu->env;
     int ret;
 
     /* Gather server mmu info from KVM and update the CPU state */
@@ -403,7 +404,7 @@ int kvm_arch_init_vcpu(CPUPPCState *cenv)
     return ret;
 }
 
-void kvm_arch_reset_vcpu(CPUPPCState *env)
+void kvm_arch_reset_vcpu(CPUState *cpu)
 {
 }
 
@@ -432,8 +433,10 @@ static void kvm_sw_tlb_put(CPUPPCState *env)
     g_free(bitmap);
 }
 
-int kvm_arch_put_registers(CPUPPCState *env, int level)
+int kvm_arch_put_registers(CPUState *cs, int level)
 {
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
     struct kvm_regs regs;
     int ret;
     int i;
@@ -525,8 +528,10 @@ int kvm_arch_put_registers(CPUPPCState *env, int level)
     return ret;
 }
 
-int kvm_arch_get_registers(CPUPPCState *env)
+int kvm_arch_get_registers(CPUState *cs)
 {
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
     struct kvm_regs regs;
     struct kvm_sregs sregs;
     uint32_t cr;
@@ -727,8 +732,10 @@ int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 #endif
 
-void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 {
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
     int r;
     unsigned irq;
 
@@ -760,13 +767,14 @@ void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
      * anyways, so we will get a chance to deliver the rest. */
 }
 
-void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 {
 }
 
-int kvm_arch_process_async_events(CPUPPCState *env)
+int kvm_arch_process_async_events(CPUState *cs)
 {
-    return env->halted;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    return cpu->env.halted;
 }
 
 static int kvmppc_handle_halt(CPUPPCState *env)
@@ -796,8 +804,10 @@ static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t dat
     return 0;
 }
 
-int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
+int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
     int ret;
 
     switch (run->exit_reason) {
@@ -817,7 +827,7 @@ int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
 #ifdef CONFIG_PSERIES
     case KVM_EXIT_PAPR_HCALL:
         dprintf("handle PAPR hypercall\n");
-        run->papr_hcall.ret = spapr_hypercall(ppc_env_get_cpu(env),
+        run->papr_hcall.ret = spapr_hypercall(cpu,
                                               run->papr_hcall.nr,
                                               run->papr_hcall.args);
         ret = 0;
@@ -1225,12 +1235,12 @@ int kvmppc_fixup_cpu(CPUPPCState *env)
 }
 
 
-bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
+bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
 {
     return true;
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
+int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
     return 1;
 }
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 94de764..d4e6ab2 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -72,8 +72,9 @@ int kvm_arch_init(KVMState *s)
     return 0;
 }
 
-int kvm_arch_init_vcpu(CPUS390XState *env)
+int kvm_arch_init_vcpu(CPUState *cpu)
 {
+    CPUS390XState *env = &S390_CPU(cpu)->env;
     int ret = 0;
 
     if (kvm_vcpu_ioctl(env, KVM_S390_INITIAL_RESET, NULL) < 0) {
@@ -83,13 +84,15 @@ int kvm_arch_init_vcpu(CPUS390XState *env)
     return ret;
 }
 
-void kvm_arch_reset_vcpu(CPUS390XState *env)
+void kvm_arch_reset_vcpu(CPUState *cpu)
 {
     /* FIXME: add code to reset vcpu. */
 }
 
-int kvm_arch_put_registers(CPUS390XState *env, int level)
+int kvm_arch_put_registers(CPUState *cs, int level)
 {
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
     struct kvm_sregs sregs;
     struct kvm_regs regs;
     int ret;
@@ -149,8 +152,10 @@ int kvm_arch_put_registers(CPUS390XState *env, int level)
     return 0;
 }
 
-int kvm_arch_get_registers(CPUS390XState *env)
+int kvm_arch_get_registers(CPUState *cs)
 {
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
     struct kvm_sregs sregs;
     struct kvm_regs regs;
     int ret;
@@ -239,8 +244,10 @@ void *kvm_arch_vmalloc(ram_addr_t size)
     }
 }
 
-int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp)
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
 {
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
     static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};
 
     if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
@@ -250,8 +257,10 @@ int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *
     return 0;
 }
 
-int kvm_arch_remove_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp)
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
 {
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
     uint8_t t[4];
     static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};
 
@@ -266,17 +275,18 @@ int kvm_arch_remove_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *
     return 0;
 }
 
-void kvm_arch_pre_run(CPUS390XState *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
 {
 }
 
-void kvm_arch_post_run(CPUS390XState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 {
 }
 
-int kvm_arch_process_async_events(CPUS390XState *env)
+int kvm_arch_process_async_events(CPUState *cs)
 {
-    return env->halted;
+    S390CPU *cpu = S390_CPU(cs);
+    return cpu->env.halted;
 }
 
 void kvm_s390_interrupt_internal(CPUS390XState *env, int type, uint32_t parm,
@@ -565,8 +575,10 @@ static int handle_intercept(CPUS390XState *env)
     return r;
 }
 
-int kvm_arch_handle_exit(CPUS390XState *env, struct kvm_run *run)
+int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
+    S390CPU *cpu = S390_CPU(cs);
+    CPUS390XState *env = &cpu->env;
     int ret = 0;
 
     switch (run->exit_reason) {
@@ -587,12 +599,12 @@ int kvm_arch_handle_exit(CPUS390XState *env, struct kvm_run *run)
     return ret;
 }
 
-bool kvm_arch_stop_on_emulation_error(CPUS390XState *env)
+bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
 {
     return true;
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUS390XState *env, int code, void *addr)
+int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
     return 1;
 }
commit 8737c51c0444f832c4e97d7eb7540eae457e08e4
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Oct 31 05:29:00 2012 +0100

    cpu: Move kvm_fd into CPUState
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/cpu-defs.h b/cpu-defs.h
index 3669241..6373a80 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -207,7 +207,6 @@ typedef struct CPUWatchpoint {
     const char *cpu_model_str;                                          \
     struct KVMState *kvm_state;                                         \
     struct kvm_run *kvm_run;                                            \
-    int kvm_fd;                                                         \
     int kvm_vcpu_dirty;
 
 #endif
diff --git a/include/qemu/cpu.h b/include/qemu/cpu.h
index 61b7698..6049a20 100644
--- a/include/qemu/cpu.h
+++ b/include/qemu/cpu.h
@@ -57,6 +57,7 @@ typedef struct CPUClass {
  * @created: Indicates whether the CPU thread has been successfully created.
  * @stop: Indicates a pending stop request.
  * @stopped: Indicates the CPU has been artificially stopped.
+ * @kvm_fd: vCPU file descriptor for KVM.
  *
  * State of one CPU core or thread.
  */
@@ -77,6 +78,10 @@ struct CPUState {
     bool stop;
     bool stopped;
 
+#if !defined(CONFIG_USER_ONLY)
+    int kvm_fd;
+#endif
+
     /* TODO Move common fields from CPUArchState here. */
 };
 
diff --git a/kvm-all.c b/kvm-all.c
index 8e9a8d8..8a00df7 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -216,6 +216,7 @@ static void kvm_reset_vcpu(void *opaque)
 
 int kvm_init_vcpu(CPUArchState *env)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     KVMState *s = kvm_state;
     long mmap_size;
     int ret;
@@ -228,7 +229,7 @@ int kvm_init_vcpu(CPUArchState *env)
         goto err;
     }
 
-    env->kvm_fd = ret;
+    cpu->kvm_fd = ret;
     env->kvm_state = s;
     env->kvm_vcpu_dirty = 1;
 
@@ -240,7 +241,7 @@ int kvm_init_vcpu(CPUArchState *env)
     }
 
     env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                        env->kvm_fd, 0);
+                        cpu->kvm_fd, 0);
     if (env->kvm_run == MAP_FAILED) {
         ret = -errno;
         DPRINTF("mmap'ing vcpu state failed\n");
@@ -1652,6 +1653,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
 
 int kvm_vcpu_ioctl(CPUArchState *env, int type, ...)
 {
+    CPUState *cpu = ENV_GET_CPU(env);
     int ret;
     void *arg;
     va_list ap;
@@ -1660,7 +1662,7 @@ int kvm_vcpu_ioctl(CPUArchState *env, int type, ...)
     arg = va_arg(ap, void *);
     va_end(ap);
 
-    ret = ioctl(env->kvm_fd, type, arg);
+    ret = ioctl(cpu->kvm_fd, type, arg);
     if (ret == -1) {
         ret = -errno;
     }
commit a404b61244ff555ace0a1360fc22275fbeda503e
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Wed Dec 5 14:49:11 2012 -0200

    qdev-properties.c: Separate core from the code used only by qemu-system-*
    
    This separates the qdev properties code in two parts:
     - qdev-properties.c, that contains most of the qdev properties code;
     - qdev-properties-system.c for code specific for qemu-system-*,
       containing:
       - Property types: drive, chr, netdev, vlan, that depend on code that
         won't be included on *-user
       - qemu_add_globals(), that depends on qemu-config.o.
    
    This change should help on two things:
     - Allowing DeviceState to be used by *-user without pulling
       dependencies that are specific for qemu-system-*;
     - Writing qdev unit tests without pulling too many dependencies.
    
    The copyright/license of qdev-properties.c isn't explicitly stated at
    the file, so add a simple copyright/license header pointing to the
    commit ID of the original file.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index d581d8d..96a8365 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -185,6 +185,7 @@ common-obj-y += bt.o bt-l2cap.o bt-sdp.o bt-hci.o bt-hid.o
 common-obj-y += bt-hci-csr.o
 common-obj-y += msmouse.o ps2.o
 common-obj-y += qdev.o qdev-properties.o qdev-monitor.o
+common-obj-y += qdev-properties-system.o
 common-obj-$(CONFIG_BRLAPI) += baum.o
 
 # xen backend driver support
diff --git a/hw/qdev-properties-system.c b/hw/qdev-properties-system.c
new file mode 100644
index 0000000..86b4cf6
--- /dev/null
+++ b/hw/qdev-properties-system.c
@@ -0,0 +1,357 @@
+/*
+ * qdev property parsing and global properties
+ * (parts specific for qemu-system-*)
+ *
+ * This file is based on code from hw/qdev-properties.c from
+ * commit 074a86fccd185616469dfcdc0e157f438aebba18,
+ * Copyright (c) Gerd Hoffmann <kraxel at redhat.com> and other contributors.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "net.h"
+#include "qdev.h"
+#include "qerror.h"
+#include "blockdev.h"
+#include "hw/block-common.h"
+#include "net/hub.h"
+#include "qapi/qapi-visit-core.h"
+
+static void get_pointer(Object *obj, Visitor *v, Property *prop,
+                        const char *(*print)(void *ptr),
+                        const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    void **ptr = qdev_get_prop_ptr(dev, prop);
+    char *p;
+
+    p = (char *) (*ptr ? print(*ptr) : "");
+    visit_type_str(v, &p, name, errp);
+}
+
+static void set_pointer(Object *obj, Visitor *v, Property *prop,
+                        int (*parse)(DeviceState *dev, const char *str,
+                                     void **ptr),
+                        const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Error *local_err = NULL;
+    void **ptr = qdev_get_prop_ptr(dev, prop);
+    char *str;
+    int ret;
+
+    if (dev->state != DEV_STATE_CREATED) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    visit_type_str(v, &str, name, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (!*str) {
+        g_free(str);
+        *ptr = NULL;
+        return;
+    }
+    ret = parse(dev, str, ptr);
+    error_set_from_qdev_prop_error(errp, ret, dev, prop, str);
+    g_free(str);
+}
+
+/* --- drive --- */
+
+static int parse_drive(DeviceState *dev, const char *str, void **ptr)
+{
+    BlockDriverState *bs;
+
+    bs = bdrv_find(str);
+    if (bs == NULL) {
+        return -ENOENT;
+    }
+    if (bdrv_attach_dev(bs, dev) < 0) {
+        return -EEXIST;
+    }
+    *ptr = bs;
+    return 0;
+}
+
+static void release_drive(Object *obj, const char *name, void *opaque)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    BlockDriverState **ptr = qdev_get_prop_ptr(dev, prop);
+
+    if (*ptr) {
+        bdrv_detach_dev(*ptr, dev);
+        blockdev_auto_del(*ptr);
+    }
+}
+
+static const char *print_drive(void *ptr)
+{
+    return bdrv_get_device_name(ptr);
+}
+
+static void get_drive(Object *obj, Visitor *v, void *opaque,
+                      const char *name, Error **errp)
+{
+    get_pointer(obj, v, opaque, print_drive, name, errp);
+}
+
+static void set_drive(Object *obj, Visitor *v, void *opaque,
+                      const char *name, Error **errp)
+{
+    set_pointer(obj, v, opaque, parse_drive, name, errp);
+}
+
+PropertyInfo qdev_prop_drive = {
+    .name  = "drive",
+    .get   = get_drive,
+    .set   = set_drive,
+    .release = release_drive,
+};
+
+/* --- character device --- */
+
+static int parse_chr(DeviceState *dev, const char *str, void **ptr)
+{
+    CharDriverState *chr = qemu_chr_find(str);
+    if (chr == NULL) {
+        return -ENOENT;
+    }
+    if (chr->avail_connections < 1) {
+        return -EEXIST;
+    }
+    *ptr = chr;
+    --chr->avail_connections;
+    return 0;
+}
+
+static void release_chr(Object *obj, const char *name, void *opaque)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    CharDriverState **ptr = qdev_get_prop_ptr(dev, prop);
+
+    if (*ptr) {
+        qemu_chr_add_handlers(*ptr, NULL, NULL, NULL, NULL);
+    }
+}
+
+
+static const char *print_chr(void *ptr)
+{
+    CharDriverState *chr = ptr;
+
+    return chr->label ? chr->label : "";
+}
+
+static void get_chr(Object *obj, Visitor *v, void *opaque,
+                    const char *name, Error **errp)
+{
+    get_pointer(obj, v, opaque, print_chr, name, errp);
+}
+
+static void set_chr(Object *obj, Visitor *v, void *opaque,
+                    const char *name, Error **errp)
+{
+    set_pointer(obj, v, opaque, parse_chr, name, errp);
+}
+
+PropertyInfo qdev_prop_chr = {
+    .name  = "chr",
+    .get   = get_chr,
+    .set   = set_chr,
+    .release = release_chr,
+};
+
+/* --- netdev device --- */
+
+static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
+{
+    NetClientState *netdev = qemu_find_netdev(str);
+
+    if (netdev == NULL) {
+        return -ENOENT;
+    }
+    if (netdev->peer) {
+        return -EEXIST;
+    }
+    *ptr = netdev;
+    return 0;
+}
+
+static const char *print_netdev(void *ptr)
+{
+    NetClientState *netdev = ptr;
+
+    return netdev->name ? netdev->name : "";
+}
+
+static void get_netdev(Object *obj, Visitor *v, void *opaque,
+                       const char *name, Error **errp)
+{
+    get_pointer(obj, v, opaque, print_netdev, name, errp);
+}
+
+static void set_netdev(Object *obj, Visitor *v, void *opaque,
+                       const char *name, Error **errp)
+{
+    set_pointer(obj, v, opaque, parse_netdev, name, errp);
+}
+
+PropertyInfo qdev_prop_netdev = {
+    .name  = "netdev",
+    .get   = get_netdev,
+    .set   = set_netdev,
+};
+
+/* --- vlan --- */
+
+static int print_vlan(DeviceState *dev, Property *prop, char *dest, size_t len)
+{
+    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
+
+    if (*ptr) {
+        int id;
+        if (!net_hub_id_for_client(*ptr, &id)) {
+            return snprintf(dest, len, "%d", id);
+        }
+    }
+
+    return snprintf(dest, len, "<null>");
+}
+
+static void get_vlan(Object *obj, Visitor *v, void *opaque,
+                     const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
+    int32_t id = -1;
+
+    if (*ptr) {
+        int hub_id;
+        if (!net_hub_id_for_client(*ptr, &hub_id)) {
+            id = hub_id;
+        }
+    }
+
+    visit_type_int32(v, &id, name, errp);
+}
+
+static void set_vlan(Object *obj, Visitor *v, void *opaque,
+                     const char *name, Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
+    Error *local_err = NULL;
+    int32_t id;
+    NetClientState *hubport;
+
+    if (dev->state != DEV_STATE_CREATED) {
+        error_set(errp, QERR_PERMISSION_DENIED);
+        return;
+    }
+
+    visit_type_int32(v, &id, name, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (id == -1) {
+        *ptr = NULL;
+        return;
+    }
+
+    hubport = net_hub_port_find(id);
+    if (!hubport) {
+        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+                  name, prop->info->name);
+        return;
+    }
+    *ptr = hubport;
+}
+
+PropertyInfo qdev_prop_vlan = {
+    .name  = "vlan",
+    .print = print_vlan,
+    .get   = get_vlan,
+    .set   = set_vlan,
+};
+
+int qdev_prop_set_drive(DeviceState *dev, const char *name,
+                        BlockDriverState *value)
+{
+    Error *errp = NULL;
+    const char *bdrv_name = value ? bdrv_get_device_name(value) : "";
+    object_property_set_str(OBJECT(dev), bdrv_name,
+                            name, &errp);
+    if (errp) {
+        qerror_report_err(errp);
+        error_free(errp);
+        return -1;
+    }
+    return 0;
+}
+
+void qdev_prop_set_drive_nofail(DeviceState *dev, const char *name,
+                                BlockDriverState *value)
+{
+    if (qdev_prop_set_drive(dev, name, value) < 0) {
+        exit(1);
+    }
+}
+void qdev_prop_set_chr(DeviceState *dev, const char *name,
+                       CharDriverState *value)
+{
+    Error *errp = NULL;
+    assert(!value || value->label);
+    object_property_set_str(OBJECT(dev),
+                            value ? value->label : "", name, &errp);
+    assert_no_error(errp);
+}
+
+void qdev_prop_set_netdev(DeviceState *dev, const char *name,
+                          NetClientState *value)
+{
+    Error *errp = NULL;
+    assert(!value || value->name);
+    object_property_set_str(OBJECT(dev),
+                            value ? value->name : "", name, &errp);
+    assert_no_error(errp);
+}
+
+void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
+{
+    qdev_prop_set_macaddr(dev, "mac", nd->macaddr.a);
+    if (nd->netdev) {
+        qdev_prop_set_netdev(dev, "netdev", nd->netdev);
+    }
+    if (nd->nvectors != DEV_NVECTORS_UNSPECIFIED &&
+        object_property_find(OBJECT(dev), "vectors", NULL)) {
+        qdev_prop_set_uint32(dev, "vectors", nd->nvectors);
+    }
+    nd->instantiated = 1;
+}
+
+static int qdev_add_one_global(QemuOpts *opts, void *opaque)
+{
+    GlobalProperty *g;
+
+    g = g_malloc0(sizeof(*g));
+    g->driver   = qemu_opt_get(opts, "driver");
+    g->property = qemu_opt_get(opts, "property");
+    g->value    = qemu_opt_get(opts, "value");
+    qdev_prop_register_global(g);
+    return 0;
+}
+
+void qemu_add_globals(void)
+{
+    qemu_opts_foreach(qemu_find_opts("global"), qdev_add_one_global, NULL, 0);
+}
diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index 67543fd..bbab2a9 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -13,49 +13,6 @@ void *qdev_get_prop_ptr(DeviceState *dev, Property *prop)
     return ptr;
 }
 
-static void get_pointer(Object *obj, Visitor *v, Property *prop,
-                        const char *(*print)(void *ptr),
-                        const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    void **ptr = qdev_get_prop_ptr(dev, prop);
-    char *p;
-
-    p = (char *) (*ptr ? print(*ptr) : "");
-    visit_type_str(v, &p, name, errp);
-}
-
-static void set_pointer(Object *obj, Visitor *v, Property *prop,
-                        int (*parse)(DeviceState *dev, const char *str,
-                                     void **ptr),
-                        const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Error *local_err = NULL;
-    void **ptr = qdev_get_prop_ptr(dev, prop);
-    char *str;
-    int ret;
-
-    if (dev->state != DEV_STATE_CREATED) {
-        error_set(errp, QERR_PERMISSION_DENIED);
-        return;
-    }
-
-    visit_type_str(v, &str, name, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    if (!*str) {
-        g_free(str);
-        *ptr = NULL;
-        return;
-    }
-    ret = parse(dev, str, ptr);
-    error_set_from_qdev_prop_error(errp, ret, dev, prop, str);
-    g_free(str);
-}
-
 static void get_enum(Object *obj, Visitor *v, void *opaque,
                      const char *name, Error **errp)
 {
@@ -479,229 +436,6 @@ PropertyInfo qdev_prop_string = {
     .set   = set_string,
 };
 
-/* --- drive --- */
-
-static int parse_drive(DeviceState *dev, const char *str, void **ptr)
-{
-    BlockDriverState *bs;
-
-    bs = bdrv_find(str);
-    if (bs == NULL) {
-        return -ENOENT;
-    }
-    if (bdrv_attach_dev(bs, dev) < 0) {
-        return -EEXIST;
-    }
-    *ptr = bs;
-    return 0;
-}
-
-static void release_drive(Object *obj, const char *name, void *opaque)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    BlockDriverState **ptr = qdev_get_prop_ptr(dev, prop);
-
-    if (*ptr) {
-        bdrv_detach_dev(*ptr, dev);
-        blockdev_auto_del(*ptr);
-    }
-}
-
-static const char *print_drive(void *ptr)
-{
-    return bdrv_get_device_name(ptr);
-}
-
-static void get_drive(Object *obj, Visitor *v, void *opaque,
-                      const char *name, Error **errp)
-{
-    get_pointer(obj, v, opaque, print_drive, name, errp);
-}
-
-static void set_drive(Object *obj, Visitor *v, void *opaque,
-                      const char *name, Error **errp)
-{
-    set_pointer(obj, v, opaque, parse_drive, name, errp);
-}
-
-PropertyInfo qdev_prop_drive = {
-    .name  = "drive",
-    .get   = get_drive,
-    .set   = set_drive,
-    .release = release_drive,
-};
-
-/* --- character device --- */
-
-static int parse_chr(DeviceState *dev, const char *str, void **ptr)
-{
-    CharDriverState *chr = qemu_chr_find(str);
-    if (chr == NULL) {
-        return -ENOENT;
-    }
-    if (chr->avail_connections < 1) {
-        return -EEXIST;
-    }
-    *ptr = chr;
-    --chr->avail_connections;
-    return 0;
-}
-
-static void release_chr(Object *obj, const char *name, void *opaque)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    CharDriverState **ptr = qdev_get_prop_ptr(dev, prop);
-
-    if (*ptr) {
-        qemu_chr_add_handlers(*ptr, NULL, NULL, NULL, NULL);
-    }
-}
-
-
-static const char *print_chr(void *ptr)
-{
-    CharDriverState *chr = ptr;
-
-    return chr->label ? chr->label : "";
-}
-
-static void get_chr(Object *obj, Visitor *v, void *opaque,
-                    const char *name, Error **errp)
-{
-    get_pointer(obj, v, opaque, print_chr, name, errp);
-}
-
-static void set_chr(Object *obj, Visitor *v, void *opaque,
-                    const char *name, Error **errp)
-{
-    set_pointer(obj, v, opaque, parse_chr, name, errp);
-}
-
-PropertyInfo qdev_prop_chr = {
-    .name  = "chr",
-    .get   = get_chr,
-    .set   = set_chr,
-    .release = release_chr,
-};
-
-/* --- netdev device --- */
-
-static int parse_netdev(DeviceState *dev, const char *str, void **ptr)
-{
-    NetClientState *netdev = qemu_find_netdev(str);
-
-    if (netdev == NULL) {
-        return -ENOENT;
-    }
-    if (netdev->peer) {
-        return -EEXIST;
-    }
-    *ptr = netdev;
-    return 0;
-}
-
-static const char *print_netdev(void *ptr)
-{
-    NetClientState *netdev = ptr;
-
-    return netdev->name ? netdev->name : "";
-}
-
-static void get_netdev(Object *obj, Visitor *v, void *opaque,
-                       const char *name, Error **errp)
-{
-    get_pointer(obj, v, opaque, print_netdev, name, errp);
-}
-
-static void set_netdev(Object *obj, Visitor *v, void *opaque,
-                       const char *name, Error **errp)
-{
-    set_pointer(obj, v, opaque, parse_netdev, name, errp);
-}
-
-PropertyInfo qdev_prop_netdev = {
-    .name  = "netdev",
-    .get   = get_netdev,
-    .set   = set_netdev,
-};
-
-/* --- vlan --- */
-
-static int print_vlan(DeviceState *dev, Property *prop, char *dest, size_t len)
-{
-    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
-
-    if (*ptr) {
-        int id;
-        if (!net_hub_id_for_client(*ptr, &id)) {
-            return snprintf(dest, len, "%d", id);
-        }
-    }
-
-    return snprintf(dest, len, "<null>");
-}
-
-static void get_vlan(Object *obj, Visitor *v, void *opaque,
-                     const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
-    int32_t id = -1;
-
-    if (*ptr) {
-        int hub_id;
-        if (!net_hub_id_for_client(*ptr, &hub_id)) {
-            id = hub_id;
-        }
-    }
-
-    visit_type_int32(v, &id, name, errp);
-}
-
-static void set_vlan(Object *obj, Visitor *v, void *opaque,
-                     const char *name, Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    NetClientState **ptr = qdev_get_prop_ptr(dev, prop);
-    Error *local_err = NULL;
-    int32_t id;
-    NetClientState *hubport;
-
-    if (dev->state != DEV_STATE_CREATED) {
-        error_set(errp, QERR_PERMISSION_DENIED);
-        return;
-    }
-
-    visit_type_int32(v, &id, name, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    if (id == -1) {
-        *ptr = NULL;
-        return;
-    }
-
-    hubport = net_hub_port_find(id);
-    if (!hubport) {
-        error_set(errp, QERR_INVALID_PARAMETER_VALUE,
-                  name, prop->info->name);
-        return;
-    }
-    *ptr = hubport;
-}
-
-PropertyInfo qdev_prop_vlan = {
-    .name  = "vlan",
-    .print = print_vlan,
-    .get   = get_vlan,
-    .set   = set_vlan,
-};
-
 /* --- pointer --- */
 
 /* Not a proper property, just for dirty hacks.  TODO Remove it!  */
@@ -1170,48 +904,6 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
     assert_no_error(errp);
 }
 
-int qdev_prop_set_drive(DeviceState *dev, const char *name,
-                        BlockDriverState *value)
-{
-    Error *errp = NULL;
-    const char *bdrv_name = value ? bdrv_get_device_name(value) : "";
-    object_property_set_str(OBJECT(dev), bdrv_name,
-                            name, &errp);
-    if (errp) {
-        qerror_report_err(errp);
-        error_free(errp);
-        return -1;
-    }
-    return 0;
-}
-
-void qdev_prop_set_drive_nofail(DeviceState *dev, const char *name,
-                                BlockDriverState *value)
-{
-    if (qdev_prop_set_drive(dev, name, value) < 0) {
-        exit(1);
-    }
-}
-void qdev_prop_set_chr(DeviceState *dev, const char *name,
-                       CharDriverState *value)
-{
-    Error *errp = NULL;
-    assert(!value || value->label);
-    object_property_set_str(OBJECT(dev),
-                            value ? value->label : "", name, &errp);
-    assert_no_error(errp);
-}
-
-void qdev_prop_set_netdev(DeviceState *dev, const char *name,
-                          NetClientState *value)
-{
-    Error *errp = NULL;
-    assert(!value || value->name);
-    object_property_set_str(OBJECT(dev),
-                            value ? value->name : "", name, &errp);
-    assert_no_error(errp);
-}
-
 void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value)
 {
     Error *errp = NULL;
@@ -1248,7 +940,7 @@ void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value)
 static QTAILQ_HEAD(, GlobalProperty) global_props =
         QTAILQ_HEAD_INITIALIZER(global_props);
 
-static void qdev_prop_register_global(GlobalProperty *prop)
+void qdev_prop_register_global(GlobalProperty *prop)
 {
     QTAILQ_INSERT_TAIL(&global_props, prop, next);
 }
@@ -1279,20 +971,3 @@ void qdev_prop_set_globals(DeviceState *dev)
         class = object_class_get_parent(class);
     } while (class);
 }
-
-static int qdev_add_one_global(QemuOpts *opts, void *opaque)
-{
-    GlobalProperty *g;
-
-    g = g_malloc0(sizeof(*g));
-    g->driver   = qemu_opt_get(opts, "driver");
-    g->property = qemu_opt_get(opts, "property");
-    g->value    = qemu_opt_get(opts, "value");
-    qdev_prop_register_global(g);
-    return 0;
-}
-
-void qemu_add_globals(void)
-{
-    qemu_opts_foreach(qemu_find_opts("global"), qdev_add_one_global, NULL, 0);
-}
diff --git a/hw/qdev-properties.h b/hw/qdev-properties.h
index 5b046ab..ddcf774 100644
--- a/hw/qdev-properties.h
+++ b/hw/qdev-properties.h
@@ -116,6 +116,7 @@ void qdev_prop_set_enum(DeviceState *dev, const char *name, int value);
 /* FIXME: Remove opaque pointer properties.  */
 void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value);
 
+void qdev_prop_register_global(GlobalProperty *prop);
 void qdev_prop_register_global_list(GlobalProperty *props);
 void qdev_prop_set_globals(DeviceState *dev);
 void error_set_from_qdev_prop_error(Error **errp, int ret, DeviceState *dev,
diff --git a/hw/qdev.c b/hw/qdev.c
index 0f8b878..fa0af21 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -25,7 +25,6 @@
    inherit from a particular bus (e.g. PCI or I2C) rather than
    this API directly.  */
 
-#include "net.h"
 #include "qdev.h"
 #include "sysemu.h"
 #include "error.h"
@@ -312,19 +311,6 @@ void qdev_connect_gpio_out(DeviceState * dev, int n, qemu_irq pin)
     dev->gpio_out[n] = pin;
 }
 
-void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
-{
-    qdev_prop_set_macaddr(dev, "mac", nd->macaddr.a);
-    if (nd->netdev) {
-        qdev_prop_set_netdev(dev, "netdev", nd->netdev);
-    }
-    if (nd->nvectors != DEV_NVECTORS_UNSPECIFIED &&
-        object_property_find(OBJECT(dev), "vectors", NULL)) {
-        qdev_prop_set_uint32(dev, "vectors", nd->nvectors);
-    }
-    nd->instantiated = 1;
-}
-
 BusState *qdev_get_child_bus(DeviceState *dev, const char *name)
 {
     BusState *bus;
commit 04a2d61e494532260214736ebb0f975822771643
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Wed Dec 5 14:49:10 2012 -0200

    qdev: Coding style fixes
    
    Add missing braces and break lines larger than 80 chars.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c
index 81d901c..67543fd 100644
--- a/hw/qdev-properties.c
+++ b/hw/qdev-properties.c
@@ -95,10 +95,11 @@ static void bit_prop_set(DeviceState *dev, Property *props, bool val)
 {
     uint32_t *p = qdev_get_prop_ptr(dev, props);
     uint32_t mask = qdev_get_prop_mask(props);
-    if (val)
+    if (val) {
         *p |= mask;
-    else
+    } else {
         *p &= ~mask;
+    }
 }
 
 static int print_bit(DeviceState *dev, Property *prop, char *dest, size_t len)
@@ -420,11 +421,13 @@ static void release_string(Object *obj, const char *name, void *opaque)
     g_free(*(char **)qdev_get_prop_ptr(DEVICE(obj), prop));
 }
 
-static int print_string(DeviceState *dev, Property *prop, char *dest, size_t len)
+static int print_string(DeviceState *dev, Property *prop, char *dest,
+                        size_t len)
 {
     char **ptr = qdev_get_prop_ptr(dev, prop);
-    if (!*ptr)
+    if (!*ptr) {
         return snprintf(dest, len, "<null>");
+    }
     return snprintf(dest, len, "\"%s\"", *ptr);
 }
 
@@ -483,10 +486,12 @@ static int parse_drive(DeviceState *dev, const char *str, void **ptr)
     BlockDriverState *bs;
 
     bs = bdrv_find(str);
-    if (bs == NULL)
+    if (bs == NULL) {
         return -ENOENT;
-    if (bdrv_attach_dev(bs, dev) < 0)
+    }
+    if (bdrv_attach_dev(bs, dev) < 0) {
         return -EEXIST;
+    }
     *ptr = bs;
     return 0;
 }
@@ -749,16 +754,20 @@ static void set_mac(Object *obj, Visitor *v, void *opaque,
     }
 
     for (i = 0, pos = 0; i < 6; i++, pos += 3) {
-        if (!qemu_isxdigit(str[pos]))
+        if (!qemu_isxdigit(str[pos])) {
             goto inval;
-        if (!qemu_isxdigit(str[pos+1]))
+        }
+        if (!qemu_isxdigit(str[pos+1])) {
             goto inval;
+        }
         if (i == 5) {
-            if (str[pos+2] != '\0')
+            if (str[pos+2] != '\0') {
                 goto inval;
+            }
         } else {
-            if (str[pos+2] != ':' && str[pos+2] != '-')
+            if (str[pos+2] != ':' && str[pos+2] != '-') {
                 goto inval;
+            }
         }
         mac->a[i] = strtol(str+pos, &p, 16);
     }
@@ -864,7 +873,8 @@ invalid:
     g_free(str);
 }
 
-static int print_pci_devfn(DeviceState *dev, Property *prop, char *dest, size_t len)
+static int print_pci_devfn(DeviceState *dev, Property *prop, char *dest,
+                           size_t len)
 {
     int32_t *ptr = qdev_get_prop_ptr(dev, prop);
 
@@ -1038,11 +1048,13 @@ PropertyInfo qdev_prop_pci_host_devaddr = {
 
 static Property *qdev_prop_walk(Property *props, const char *name)
 {
-    if (!props)
+    if (!props) {
         return NULL;
+    }
     while (props->name) {
-        if (strcmp(props->name, name) == 0)
+        if (strcmp(props->name, name) == 0) {
             return props;
+        }
         props++;
     }
     return NULL;
@@ -1158,7 +1170,8 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
     assert_no_error(errp);
 }
 
-int qdev_prop_set_drive(DeviceState *dev, const char *name, BlockDriverState *value)
+int qdev_prop_set_drive(DeviceState *dev, const char *name,
+                        BlockDriverState *value)
 {
     Error *errp = NULL;
     const char *bdrv_name = value ? bdrv_get_device_name(value) : "";
@@ -1172,13 +1185,15 @@ int qdev_prop_set_drive(DeviceState *dev, const char *name, BlockDriverState *va
     return 0;
 }
 
-void qdev_prop_set_drive_nofail(DeviceState *dev, const char *name, BlockDriverState *value)
+void qdev_prop_set_drive_nofail(DeviceState *dev, const char *name,
+                                BlockDriverState *value)
 {
     if (qdev_prop_set_drive(dev, name, value) < 0) {
         exit(1);
     }
 }
-void qdev_prop_set_chr(DeviceState *dev, const char *name, CharDriverState *value)
+void qdev_prop_set_chr(DeviceState *dev, const char *name,
+                       CharDriverState *value)
 {
     Error *errp = NULL;
     assert(!value || value->label);
@@ -1187,7 +1202,8 @@ void qdev_prop_set_chr(DeviceState *dev, const char *name, CharDriverState *valu
     assert_no_error(errp);
 }
 
-void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value)
+void qdev_prop_set_netdev(DeviceState *dev, const char *name,
+                          NetClientState *value)
 {
     Error *errp = NULL;
     assert(!value || value->name);
@@ -1229,7 +1245,8 @@ void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value)
     *ptr = value;
 }
 
-static QTAILQ_HEAD(, GlobalProperty) global_props = QTAILQ_HEAD_INITIALIZER(global_props);
+static QTAILQ_HEAD(, GlobalProperty) global_props =
+        QTAILQ_HEAD_INITIALIZER(global_props);
 
 static void qdev_prop_register_global(GlobalProperty *prop)
 {
diff --git a/hw/qdev.c b/hw/qdev.c
index 599382c..0f8b878 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -315,8 +315,9 @@ void qdev_connect_gpio_out(DeviceState * dev, int n, qemu_irq pin)
 void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
 {
     qdev_prop_set_macaddr(dev, "mac", nd->macaddr.a);
-    if (nd->netdev)
+    if (nd->netdev) {
         qdev_prop_set_netdev(dev, "netdev", nd->netdev);
+    }
     if (nd->nvectors != DEV_NVECTORS_UNSPECIFIED &&
         object_property_find(OBJECT(dev), "vectors", NULL)) {
         qdev_prop_set_uint32(dev, "vectors", nd->nvectors);
commit 92a3136174f60ee45b113296cb2c2a5225b00369
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sun Dec 16 02:17:02 2012 +0100

    cpu: Introduce CPUListState struct
    
    This generalizes {ARM,M68k,Alpha}CPUListState to avoid declaring it for
    each target. Place it in cpu-common.h to avoid circular dependencies.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Reviewed-by: Igor Mammedov <imammedo at redhat.com>
    Reviewed-by: Eduardo Habkost <ehabkost at redhat.com>

diff --git a/cpu-common.h b/cpu-common.h
index d2fbafa..a62b6ea 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -12,6 +12,18 @@
 #include "bswap.h"
 #include "qemu-queue.h"
 
+/**
+ * CPUListState:
+ * @cpu_fprintf: Print function.
+ * @file: File to print to using @cpu_fprint.
+ *
+ * State commonly used for iterating over CPU models.
+ */
+typedef struct CPUListState {
+    fprintf_function cpu_fprintf;
+    FILE *file;
+} CPUListState;
+
 #if !defined(CONFIG_USER_ONLY)
 
 enum device_endian {
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 2deb3c1..59d8669 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -33,11 +33,6 @@ static void alpha_cpu_realize(Object *obj, Error **errp)
 #endif
 }
 
-typedef struct AlphaCPUListState {
-    fprintf_function cpu_fprintf;
-    FILE *file;
-} AlphaCPUListState;
-
 /* Sort alphabetically by type name. */
 static gint alpha_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -53,7 +48,7 @@ static gint alpha_cpu_list_compare(gconstpointer a, gconstpointer b)
 static void alpha_cpu_list_entry(gpointer data, gpointer user_data)
 {
     ObjectClass *oc = data;
-    AlphaCPUListState *s = user_data;
+    CPUListState *s = user_data;
 
     (*s->cpu_fprintf)(s->file, "  %s\n",
                       object_class_get_name(oc));
@@ -61,7 +56,7 @@ static void alpha_cpu_list_entry(gpointer data, gpointer user_data)
 
 void alpha_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    AlphaCPUListState s = {
+    CPUListState s = {
         .file = f,
         .cpu_fprintf = cpu_fprintf,
     };
diff --git a/target-arm/helper.c b/target-arm/helper.c
index ab8b734..d2f2fb4 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1291,11 +1291,6 @@ ARMCPU *cpu_arm_init(const char *cpu_model)
     return cpu;
 }
 
-typedef struct ARMCPUListState {
-    fprintf_function cpu_fprintf;
-    FILE *file;
-} ARMCPUListState;
-
 /* Sort alphabetically by type name, except for "any". */
 static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -1317,7 +1312,7 @@ static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b)
 static void arm_cpu_list_entry(gpointer data, gpointer user_data)
 {
     ObjectClass *oc = data;
-    ARMCPUListState *s = user_data;
+    CPUListState *s = user_data;
 
     (*s->cpu_fprintf)(s->file, "  %s\n",
                       object_class_get_name(oc));
@@ -1325,7 +1320,7 @@ static void arm_cpu_list_entry(gpointer data, gpointer user_data)
 
 void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    ARMCPUListState s = {
+    CPUListState s = {
         .file = f,
         .cpu_fprintf = cpu_fprintf,
     };
diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index a5d0100..875a71a 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -25,11 +25,6 @@
 
 #define SIGNBIT (1u << 31)
 
-typedef struct M68kCPUListState {
-    fprintf_function cpu_fprintf;
-    FILE *file;
-} M68kCPUListState;
-
 /* Sort alphabetically, except for "any". */
 static gint m68k_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -51,7 +46,7 @@ static gint m68k_cpu_list_compare(gconstpointer a, gconstpointer b)
 static void m68k_cpu_list_entry(gpointer data, gpointer user_data)
 {
     ObjectClass *c = data;
-    M68kCPUListState *s = user_data;
+    CPUListState *s = user_data;
 
     (*s->cpu_fprintf)(s->file, "%s\n",
                       object_class_get_name(c));
@@ -59,7 +54,7 @@ static void m68k_cpu_list_entry(gpointer data, gpointer user_data)
 
 void m68k_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    M68kCPUListState s = {
+    CPUListState s = {
         .file = f,
         .cpu_fprintf = cpu_fprintf,
     };
commit 494342b35b55b3b126821141e15c8a49df122ff1
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Oct 15 17:44:21 2012 +0200

    target-alpha: Add support for -cpu ?
    
    Implement alphabetical listing of CPU subclasses.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Acked-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index a5a98d0..2deb3c1 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -33,6 +33,47 @@ static void alpha_cpu_realize(Object *obj, Error **errp)
 #endif
 }
 
+typedef struct AlphaCPUListState {
+    fprintf_function cpu_fprintf;
+    FILE *file;
+} AlphaCPUListState;
+
+/* Sort alphabetically by type name. */
+static gint alpha_cpu_list_compare(gconstpointer a, gconstpointer b)
+{
+    ObjectClass *class_a = (ObjectClass *)a;
+    ObjectClass *class_b = (ObjectClass *)b;
+    const char *name_a, *name_b;
+
+    name_a = object_class_get_name(class_a);
+    name_b = object_class_get_name(class_b);
+    return strcmp(name_a, name_b);
+}
+
+static void alpha_cpu_list_entry(gpointer data, gpointer user_data)
+{
+    ObjectClass *oc = data;
+    AlphaCPUListState *s = user_data;
+
+    (*s->cpu_fprintf)(s->file, "  %s\n",
+                      object_class_get_name(oc));
+}
+
+void alpha_cpu_list(FILE *f, fprintf_function cpu_fprintf)
+{
+    AlphaCPUListState s = {
+        .file = f,
+        .cpu_fprintf = cpu_fprintf,
+    };
+    GSList *list;
+
+    list = object_class_get_list(TYPE_ALPHA_CPU, false);
+    list = g_slist_sort(list, alpha_cpu_list_compare);
+    (*cpu_fprintf)(f, "Available CPUs:\n");
+    g_slist_foreach(list, alpha_cpu_list_entry, &s);
+    g_slist_free(list);
+}
+
 /* Models */
 
 #define TYPE(model) model "-" TYPE_ALPHA_CPU
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 0d08458..23f06c5 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -289,6 +289,7 @@ struct CPUAlphaState {
     int implver;
 };
 
+#define cpu_list alpha_cpu_list
 #define cpu_exec cpu_alpha_exec
 #define cpu_gen_code cpu_alpha_gen_code
 #define cpu_signal_handler cpu_alpha_signal_handler
@@ -438,6 +439,7 @@ static inline CPUAlphaState *cpu_init(const char *cpu_model)
     return &cpu->env;
 }
 
+void alpha_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 int cpu_alpha_exec(CPUAlphaState *s);
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
commit 0c28246fcd5ea9ccb22aa93ef2e0af14463fec58
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Oct 15 17:33:32 2012 +0200

    target-alpha: Turn CPU definitions into subclasses
    
    Make TYPE_ALPHA_CPU abstract and add types <name>-alpha-cpu.
    Use type inheritence, and turn "2*" models into aliases.
    
    Move cpu_alpha_init() to cpu.c and split out CPU realization.
    Default to creating type "ev67-alpha-cpu" as before.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Acked-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 11a19eb..a5a98d0 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -21,8 +21,175 @@
 
 #include "cpu.h"
 #include "qemu-common.h"
+#include "error.h"
 
 
+static void alpha_cpu_realize(Object *obj, Error **errp)
+{
+#ifndef CONFIG_USER_ONLY
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+
+    qemu_init_vcpu(&cpu->env);
+#endif
+}
+
+/* Models */
+
+#define TYPE(model) model "-" TYPE_ALPHA_CPU
+
+typedef struct AlphaCPUAlias {
+    const char *alias;
+    const char *typename;
+} AlphaCPUAlias;
+
+static const AlphaCPUAlias alpha_cpu_aliases[] = {
+    { "21064",   TYPE("ev4") },
+    { "21164",   TYPE("ev5") },
+    { "21164a",  TYPE("ev56") },
+    { "21164pc", TYPE("pca56") },
+    { "21264",   TYPE("ev6") },
+    { "21264a",  TYPE("ev67") },
+};
+
+static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc = NULL;
+    char *typename;
+    int i;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL) {
+        return oc;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(alpha_cpu_aliases); i++) {
+        if (strcmp(cpu_model, alpha_cpu_aliases[i].alias) == 0) {
+            oc = object_class_by_name(alpha_cpu_aliases[i].typename);
+            assert(oc != NULL);
+            return oc;
+        }
+    }
+
+    typename = g_strdup_printf("%s-" TYPE_ALPHA_CPU, cpu_model);
+    oc = object_class_by_name(typename);
+    g_free(typename);
+    return oc;
+}
+
+AlphaCPU *cpu_alpha_init(const char *cpu_model)
+{
+    AlphaCPU *cpu;
+    CPUAlphaState *env;
+    ObjectClass *cpu_class;
+
+    cpu_class = alpha_cpu_class_by_name(cpu_model);
+    if (cpu_class == NULL) {
+        /* Default to ev67; no reason not to emulate insns by default.  */
+        cpu_class = object_class_by_name(TYPE("ev67"));
+    }
+    cpu = ALPHA_CPU(object_new(object_class_get_name(cpu_class)));
+    env = &cpu->env;
+
+    env->cpu_model_str = cpu_model;
+
+    alpha_cpu_realize(OBJECT(cpu), NULL);
+    return cpu;
+}
+
+static void ev4_cpu_initfn(Object *obj)
+{
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+    CPUAlphaState *env = &cpu->env;
+
+    env->implver = IMPLVER_2106x;
+}
+
+static const TypeInfo ev4_cpu_type_info = {
+    .name = TYPE("ev4"),
+    .parent = TYPE_ALPHA_CPU,
+    .instance_init = ev4_cpu_initfn,
+};
+
+static void ev5_cpu_initfn(Object *obj)
+{
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+    CPUAlphaState *env = &cpu->env;
+
+    env->implver = IMPLVER_21164;
+}
+
+static const TypeInfo ev5_cpu_type_info = {
+    .name = TYPE("ev5"),
+    .parent = TYPE_ALPHA_CPU,
+    .instance_init = ev5_cpu_initfn,
+};
+
+static void ev56_cpu_initfn(Object *obj)
+{
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+    CPUAlphaState *env = &cpu->env;
+
+    env->amask |= AMASK_BWX;
+}
+
+static const TypeInfo ev56_cpu_type_info = {
+    .name = TYPE("ev56"),
+    .parent = TYPE("ev5"),
+    .instance_init = ev56_cpu_initfn,
+};
+
+static void pca56_cpu_initfn(Object *obj)
+{
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+    CPUAlphaState *env = &cpu->env;
+
+    env->amask |= AMASK_MVI;
+}
+
+static const TypeInfo pca56_cpu_type_info = {
+    .name = TYPE("pca56"),
+    .parent = TYPE("ev56"),
+    .instance_init = pca56_cpu_initfn,
+};
+
+static void ev6_cpu_initfn(Object *obj)
+{
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+    CPUAlphaState *env = &cpu->env;
+
+    env->implver = IMPLVER_21264;
+    env->amask = AMASK_BWX | AMASK_FIX | AMASK_MVI | AMASK_TRAP;
+}
+
+static const TypeInfo ev6_cpu_type_info = {
+    .name = TYPE("ev6"),
+    .parent = TYPE_ALPHA_CPU,
+    .instance_init = ev6_cpu_initfn,
+};
+
+static void ev67_cpu_initfn(Object *obj)
+{
+    AlphaCPU *cpu = ALPHA_CPU(obj);
+    CPUAlphaState *env = &cpu->env;
+
+    env->amask |= AMASK_CIX | AMASK_PREFETCH;
+}
+
+static const TypeInfo ev67_cpu_type_info = {
+    .name = TYPE("ev67"),
+    .parent = TYPE("ev6"),
+    .instance_init = ev67_cpu_initfn,
+};
+
+static const TypeInfo ev68_cpu_type_info = {
+    .name = TYPE("ev68"),
+    .parent = TYPE("ev67"),
+};
+
 static void alpha_cpu_initfn(Object *obj)
 {
     AlphaCPU *cpu = ALPHA_CPU(obj);
@@ -31,6 +198,8 @@ static void alpha_cpu_initfn(Object *obj)
     cpu_exec_init(env);
     tlb_flush(env, 1);
 
+    alpha_translate_init();
+
 #if defined(CONFIG_USER_ONLY)
     env->ps = PS_USER_MODE;
     cpu_alpha_store_fpcr(env, (FPCR_INVD | FPCR_DZED | FPCR_OVFD
@@ -46,13 +215,20 @@ static const TypeInfo alpha_cpu_type_info = {
     .parent = TYPE_CPU,
     .instance_size = sizeof(AlphaCPU),
     .instance_init = alpha_cpu_initfn,
-    .abstract = false,
+    .abstract = true,
     .class_size = sizeof(AlphaCPUClass),
 };
 
 static void alpha_cpu_register_types(void)
 {
     type_register_static(&alpha_cpu_type_info);
+    type_register_static(&ev4_cpu_type_info);
+    type_register_static(&ev5_cpu_type_info);
+    type_register_static(&ev56_cpu_type_info);
+    type_register_static(&pca56_cpu_type_info);
+    type_register_static(&ev6_cpu_type_info);
+    type_register_static(&ev67_cpu_type_info);
+    type_register_static(&ev68_cpu_type_info);
 }
 
 type_init(alpha_cpu_register_types)
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index e1d7715..0d08458 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -425,6 +425,8 @@ enum {
     IR_ZERO = 31,
 };
 
+void alpha_translate_init(void);
+
 AlphaCPU *cpu_alpha_init(const char *cpu_model);
 
 static inline CPUAlphaState *cpu_init(const char *cpu_model)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 6f41ef7..dc0c97c 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -90,7 +90,7 @@ static char cpu_reg_names[10*4+21*5 + 10*5+21*6];
 
 #include "gen-icount.h"
 
-static void alpha_translate_init(void)
+void alpha_translate_init(void)
 {
     int i;
     char *p;
@@ -3493,62 +3493,6 @@ void gen_intermediate_code_pc (CPUAlphaState *env, struct TranslationBlock *tb)
     gen_intermediate_code_internal(env, tb, 1);
 }
 
-struct cpu_def_t {
-    const char *name;
-    int implver, amask;
-};
-
-static const struct cpu_def_t cpu_defs[] = {
-    { "ev4",   IMPLVER_2106x, 0 },
-    { "ev5",   IMPLVER_21164, 0 },
-    { "ev56",  IMPLVER_21164, AMASK_BWX },
-    { "pca56", IMPLVER_21164, AMASK_BWX | AMASK_MVI },
-    { "ev6",   IMPLVER_21264, AMASK_BWX | AMASK_FIX | AMASK_MVI | AMASK_TRAP },
-    { "ev67",  IMPLVER_21264, (AMASK_BWX | AMASK_FIX | AMASK_CIX
-			       | AMASK_MVI | AMASK_TRAP | AMASK_PREFETCH), },
-    { "ev68",  IMPLVER_21264, (AMASK_BWX | AMASK_FIX | AMASK_CIX
-			       | AMASK_MVI | AMASK_TRAP | AMASK_PREFETCH), },
-    { "21064", IMPLVER_2106x, 0 },
-    { "21164", IMPLVER_21164, 0 },
-    { "21164a", IMPLVER_21164, AMASK_BWX },
-    { "21164pc", IMPLVER_21164, AMASK_BWX | AMASK_MVI },
-    { "21264", IMPLVER_21264, AMASK_BWX | AMASK_FIX | AMASK_MVI | AMASK_TRAP },
-    { "21264a", IMPLVER_21264, (AMASK_BWX | AMASK_FIX | AMASK_CIX
-				| AMASK_MVI | AMASK_TRAP | AMASK_PREFETCH), }
-};
-
-AlphaCPU *cpu_alpha_init(const char *cpu_model)
-{
-    AlphaCPU *cpu;
-    CPUAlphaState *env;
-    int implver, amask, i, max;
-
-    cpu = ALPHA_CPU(object_new(TYPE_ALPHA_CPU));
-    env = &cpu->env;
-
-    alpha_translate_init();
-
-    /* Default to ev67; no reason not to emulate insns by default.  */
-    implver = IMPLVER_21264;
-    amask = (AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_MVI
-	     | AMASK_TRAP | AMASK_PREFETCH);
-
-    max = ARRAY_SIZE(cpu_defs);
-    for (i = 0; i < max; i++) {
-        if (strcmp (cpu_model, cpu_defs[i].name) == 0) {
-            implver = cpu_defs[i].implver;
-            amask = cpu_defs[i].amask;
-            break;
-        }
-    }
-    env->implver = implver;
-    env->amask = amask;
-    env->cpu_model_str = cpu_model;
-
-    qemu_init_vcpu(env);
-    return cpu;
-}
-
 void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, int pc_pos)
 {
     env->pc = tcg_ctx.gen_opc_pc[pc_pos];
commit c92458538f501eda585b4b774c50644aed391a8a
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Oct 31 02:41:11 2012 +0100

    target-alpha: Avoid leaking the alarm timer over reset
    
    Move the timer from CPUAlphaState to AlphaCPU to avoid the pointer being
    zero'ed once we implement reset. Would cause a segfault in
    sys_helper.c:helper_set_alarm().
    
    This also simplifies timer initialization in Typhoon.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Acked-by: Richard Henderson <rth at twiddle.net>

diff --git a/hw/alpha_typhoon.c b/hw/alpha_typhoon.c
index 4cc810f..40b3a47 100644
--- a/hw/alpha_typhoon.c
+++ b/hw/alpha_typhoon.c
@@ -724,8 +724,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
         AlphaCPU *cpu = cpus[i];
         s->cchip.cpu[i] = cpu;
         if (cpu != NULL) {
-            CPUAlphaState *env = &cpu->env;
-            env->alarm_timer = qemu_new_timer_ns(rtc_clock,
+            cpu->alarm_timer = qemu_new_timer_ns(rtc_clock,
                                                  typhoon_alarm_timer,
                                                  (void *)((uintptr_t)s + i));
         }
diff --git a/target-alpha/cpu-qom.h b/target-alpha/cpu-qom.h
index 6b4ca6d..98585d5 100644
--- a/target-alpha/cpu-qom.h
+++ b/target-alpha/cpu-qom.h
@@ -58,6 +58,9 @@ typedef struct AlphaCPU {
     /*< public >*/
 
     CPUAlphaState env;
+
+    /* This alarm doesn't exist in real hardware; we wish it did.  */
+    struct QEMUTimer *alarm_timer;
 } AlphaCPU;
 
 static inline AlphaCPU *alpha_env_get_cpu(CPUAlphaState *env)
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 32e3777..e1d7715 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -277,7 +277,6 @@ struct CPUAlphaState {
 #endif
 
     /* This alarm doesn't exist in real hardware; we wish it did.  */
-    struct QEMUTimer *alarm_timer;
     uint64_t alarm_expire;
 
     /* Those resources are used only in QEMU core */
diff --git a/target-alpha/sys_helper.c b/target-alpha/sys_helper.c
index 40ca49c..d4f14ef 100644
--- a/target-alpha/sys_helper.c
+++ b/target-alpha/sys_helper.c
@@ -77,11 +77,13 @@ uint64_t helper_get_time(void)
 
 void helper_set_alarm(CPUAlphaState *env, uint64_t expire)
 {
+    AlphaCPU *cpu = alpha_env_get_cpu(env);
+
     if (expire) {
         env->alarm_expire = expire;
-        qemu_mod_timer(env->alarm_timer, expire);
+        qemu_mod_timer(cpu->alarm_timer, expire);
     } else {
-        qemu_del_timer(env->alarm_timer);
+        qemu_del_timer(cpu->alarm_timer);
     }
 }
 #endif /* CONFIG_USER_ONLY */
commit ad6011775a324d7c3e2a8bd824e03c5e576dda48
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Tue Oct 16 02:45:53 2012 +0200

    alpha: Pass AlphaCPU array to Typhoon
    
    Also store it in TyphoonCchip.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Acked-by: Richard Henderson <rth at twiddle.net>

diff --git a/hw/alpha_dp264.c b/hw/alpha_dp264.c
index 76d8ae8..af24d1e 100644
--- a/hw/alpha_dp264.c
+++ b/hw/alpha_dp264.c
@@ -50,7 +50,7 @@ static void clipper_init(QEMUMachineInitArgs *args)
     const char *kernel_filename = args->kernel_filename;
     const char *kernel_cmdline = args->kernel_cmdline;
     const char *initrd_filename = args->initrd_filename;
-    CPUAlphaState *cpus[4];
+    AlphaCPU *cpus[4];
     PCIBus *pci_bus;
     ISABus *isa_bus;
     qemu_irq rtc_irq;
@@ -62,12 +62,12 @@ static void clipper_init(QEMUMachineInitArgs *args)
     /* Create up to 4 cpus.  */
     memset(cpus, 0, sizeof(cpus));
     for (i = 0; i < smp_cpus; ++i) {
-        cpus[i] = cpu_init(cpu_model ? cpu_model : "ev67");
+        cpus[i] = cpu_alpha_init(cpu_model ? cpu_model : "ev67");
     }
 
-    cpus[0]->trap_arg0 = ram_size;
-    cpus[0]->trap_arg1 = 0;
-    cpus[0]->trap_arg2 = smp_cpus;
+    cpus[0]->env.trap_arg0 = ram_size;
+    cpus[0]->env.trap_arg1 = 0;
+    cpus[0]->env.trap_arg2 = smp_cpus;
 
     /* Init the chipset.  */
     pci_bus = typhoon_init(ram_size, &isa_bus, &rtc_irq, cpus,
@@ -119,9 +119,9 @@ static void clipper_init(QEMUMachineInitArgs *args)
 
     /* Start all cpus at the PALcode RESET entry point.  */
     for (i = 0; i < smp_cpus; ++i) {
-        cpus[i]->pal_mode = 1;
-        cpus[i]->pc = palcode_entry;
-        cpus[i]->palbr = palcode_entry;
+        cpus[i]->env.pal_mode = 1;
+        cpus[i]->env.pc = palcode_entry;
+        cpus[i]->env.palbr = palcode_entry;
     }
 
     /* Load a kernel.  */
@@ -136,7 +136,7 @@ static void clipper_init(QEMUMachineInitArgs *args)
             exit(1);
         }
 
-        cpus[0]->trap_arg1 = kernel_entry;
+        cpus[0]->env.trap_arg1 = kernel_entry;
 
         param_offset = kernel_low - 0x6000;
 
diff --git a/hw/alpha_sys.h b/hw/alpha_sys.h
index 7604d09..69929ea 100644
--- a/hw/alpha_sys.h
+++ b/hw/alpha_sys.h
@@ -11,7 +11,7 @@
 #include "irq.h"
 
 
-PCIBus *typhoon_init(ram_addr_t, ISABus **, qemu_irq *, CPUAlphaState *[4],
+PCIBus *typhoon_init(ram_addr_t, ISABus **, qemu_irq *, AlphaCPU *[4],
                      pci_map_irq_fn);
 
 /* alpha_pci.c.  */
diff --git a/hw/alpha_typhoon.c b/hw/alpha_typhoon.c
index 9b16d96..4cc810f 100644
--- a/hw/alpha_typhoon.c
+++ b/hw/alpha_typhoon.c
@@ -23,7 +23,7 @@ typedef struct TyphoonCchip {
     uint64_t drir;
     uint64_t dim[4];
     uint32_t iic[4];
-    CPUAlphaState *cpu[4];
+    AlphaCPU *cpu[4];
 } TyphoonCchip;
 
 typedef struct TyphoonWindow {
@@ -58,10 +58,11 @@ typedef struct TyphoonState {
 } TyphoonState;
 
 /* Called when one of DRIR or DIM changes.  */
-static void cpu_irq_change(CPUAlphaState *env, uint64_t req)
+static void cpu_irq_change(AlphaCPU *cpu, uint64_t req)
 {
     /* If there are any non-masked interrupts, tell the cpu.  */
-    if (env) {
+    if (cpu != NULL) {
+        CPUAlphaState *env = &cpu->env;
         if (req) {
             cpu_interrupt(env, CPU_INTERRUPT_HARD);
         } else {
@@ -353,8 +354,9 @@ static void cchip_write(void *opaque, hwaddr addr,
         if ((newval ^ oldval) & 0xff0) {
             int i;
             for (i = 0; i < 4; ++i) {
-                CPUAlphaState *env = s->cchip.cpu[i];
-                if (env) {
+                AlphaCPU *cpu = s->cchip.cpu[i];
+                if (cpu != NULL) {
+                    CPUAlphaState *env = &cpu->env;
                     /* IPI can be either cleared or set by the write.  */
                     if (newval & (1 << (i + 8))) {
                         cpu_interrupt(env, CPU_INTERRUPT_SMP);
@@ -661,8 +663,8 @@ static void typhoon_set_timer_irq(void *opaque, int irq, int level)
 
     /* Deliver the interrupt to each CPU, considering each CPU's IIC.  */
     for (i = 0; i < 4; ++i) {
-        CPUAlphaState *env = s->cchip.cpu[i];
-        if (env) {
+        AlphaCPU *cpu = s->cchip.cpu[i];
+        if (cpu != NULL) {
             uint32_t iic = s->cchip.iic[i];
 
             /* ??? The verbage in Section 10.2.2.10 isn't 100% clear.
@@ -681,7 +683,7 @@ static void typhoon_set_timer_irq(void *opaque, int irq, int level)
                 /* Set the ITI bit for this cpu.  */
                 s->cchip.misc |= 1 << (i + 4);
                 /* And signal the interrupt.  */
-                cpu_interrupt(env, CPU_INTERRUPT_TIMER);
+                cpu_interrupt(&cpu->env, CPU_INTERRUPT_TIMER);
             }
         }
     }
@@ -694,12 +696,12 @@ static void typhoon_alarm_timer(void *opaque)
 
     /* Set the ITI bit for this cpu.  */
     s->cchip.misc |= 1 << (cpu + 4);
-    cpu_interrupt(s->cchip.cpu[cpu], CPU_INTERRUPT_TIMER);
+    cpu_interrupt(&s->cchip.cpu[cpu]->env, CPU_INTERRUPT_TIMER);
 }
 
 PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
                      qemu_irq *p_rtc_irq,
-                     CPUAlphaState *cpus[4], pci_map_irq_fn sys_map_irq)
+                     AlphaCPU *cpus[4], pci_map_irq_fn sys_map_irq)
 {
     const uint64_t MB = 1024 * 1024;
     const uint64_t GB = 1024 * MB;
@@ -719,9 +721,10 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
 
     /* Remember the CPUs so that we can deliver interrupts to them.  */
     for (i = 0; i < 4; i++) {
-        CPUAlphaState *env = cpus[i];
-        s->cchip.cpu[i] = env;
-        if (env) {
+        AlphaCPU *cpu = cpus[i];
+        s->cchip.cpu[i] = cpu;
+        if (cpu != NULL) {
+            CPUAlphaState *env = &cpu->env;
             env->alarm_timer = qemu_new_timer_ns(rtc_clock,
                                                  typhoon_alarm_timer,
                                                  (void *)((uintptr_t)s + i));
commit 5f5e335088925cedc5b912fd8bb4e1e933094d56
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Oct 15 17:52:00 2012 +0200

    target-alpha: Let cpu_alpha_init() return AlphaCPU
    
    Replace cpu_init() macro with inline function for backwards
    compatibility.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Acked-by: Richard Henderson <rth at twiddle.net>

diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index 9939d61..32e3777 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -290,7 +290,6 @@ struct CPUAlphaState {
     int implver;
 };
 
-#define cpu_init cpu_alpha_init
 #define cpu_exec cpu_alpha_exec
 #define cpu_gen_code cpu_alpha_gen_code
 #define cpu_signal_handler cpu_alpha_signal_handler
@@ -427,7 +426,17 @@ enum {
     IR_ZERO = 31,
 };
 
-CPUAlphaState * cpu_alpha_init (const char *cpu_model);
+AlphaCPU *cpu_alpha_init(const char *cpu_model);
+
+static inline CPUAlphaState *cpu_init(const char *cpu_model)
+{
+    AlphaCPU *cpu = cpu_alpha_init(cpu_model);
+    if (cpu == NULL) {
+        return NULL;
+    }
+    return &cpu->env;
+}
+
 int cpu_alpha_exec(CPUAlphaState *s);
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 71fe1a1..6f41ef7 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -3517,7 +3517,7 @@ static const struct cpu_def_t cpu_defs[] = {
 				| AMASK_MVI | AMASK_TRAP | AMASK_PREFETCH), }
 };
 
-CPUAlphaState * cpu_alpha_init (const char *cpu_model)
+AlphaCPU *cpu_alpha_init(const char *cpu_model)
 {
     AlphaCPU *cpu;
     CPUAlphaState *env;
@@ -3546,7 +3546,7 @@ CPUAlphaState * cpu_alpha_init (const char *cpu_model)
     env->cpu_model_str = cpu_model;
 
     qemu_init_vcpu(env);
-    return env;
+    return cpu;
 }
 
 void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb, int pc_pos)
commit f28558d3d37ad3bc4e35e8ac93f7bf81a0d5622c
Author: Will Auld <will.auld.intel at gmail.com>
Date:   Mon Nov 26 21:32:18 2012 -0800

    target-i386: Enabling IA32_TSC_ADJUST for QEMU KVM guest VMs
    
    CPUID.7.0.EBX[1]=1 indicates IA32_TSC_ADJUST MSR 0x3b is supported
    
    Basic design is to emulate the MSR by allowing reads and writes to the
    hypervisor vcpu specific locations to store the value of the emulated MSRs.
    In this way the IA32_TSC_ADJUST value will be included in all reads to
    the TSC MSR whether through rdmsr or rdtsc.
    
    As this is a new MSR that the guest may access and modify its value needs
    to be migrated along with the other MRSs. The changes here are specifically
    for recognizing when IA32_TSC_ADJUST is enabled in CPUID and code added
    for migrating its value.
    
    Signed-off-by: Will Auld <will.auld at intel.com>
    Reviewed-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Marcelo Tosatti <mtosatti at redhat.com>

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 386c4f6..477da33 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -295,6 +295,7 @@
 #define MSR_IA32_APICBASE_BSP           (1<<8)
 #define MSR_IA32_APICBASE_ENABLE        (1<<11)
 #define MSR_IA32_APICBASE_BASE          (0xfffff<<12)
+#define MSR_TSC_ADJUST                  0x0000003b
 #define MSR_IA32_TSCDEADLINE            0x6e0
 
 #define MSR_MTRRcap			0xfe
@@ -774,6 +775,7 @@ typedef struct CPUX86State {
     uint64_t pv_eoi_en_msr;
 
     uint64_t tsc;
+    uint64_t tsc_adjust;
     uint64_t tsc_deadline;
 
     uint64_t mcg_status;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index f669281..ae6ce1f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -62,6 +62,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 
 static bool has_msr_star;
 static bool has_msr_hsave_pa;
+static bool has_msr_tsc_adjust;
 static bool has_msr_tsc_deadline;
 static bool has_msr_async_pf_en;
 static bool has_msr_pv_eoi_en;
@@ -676,6 +677,10 @@ static int kvm_get_supported_msrs(KVMState *s)
                     has_msr_hsave_pa = true;
                     continue;
                 }
+                if (kvm_msr_list->indices[i] == MSR_TSC_ADJUST) {
+                    has_msr_tsc_adjust = true;
+                    continue;
+                }
                 if (kvm_msr_list->indices[i] == MSR_IA32_TSCDEADLINE) {
                     has_msr_tsc_deadline = true;
                     continue;
@@ -1013,6 +1018,9 @@ static int kvm_put_msrs(CPUX86State *env, int level)
     if (has_msr_hsave_pa) {
         kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
     }
+    if (has_msr_tsc_adjust) {
+        kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust);
+    }
     if (has_msr_tsc_deadline) {
         kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSCDEADLINE, env->tsc_deadline);
     }
@@ -1273,6 +1281,9 @@ static int kvm_get_msrs(CPUX86State *env)
     if (has_msr_hsave_pa) {
         msrs[n++].index = MSR_VM_HSAVE_PA;
     }
+    if (has_msr_tsc_adjust) {
+        msrs[n++].index = MSR_TSC_ADJUST;
+    }
     if (has_msr_tsc_deadline) {
         msrs[n++].index = MSR_IA32_TSCDEADLINE;
     }
@@ -1350,6 +1361,9 @@ static int kvm_get_msrs(CPUX86State *env)
         case MSR_IA32_TSC:
             env->tsc = msrs[i].data;
             break;
+        case MSR_TSC_ADJUST:
+            env->tsc_adjust = msrs[i].data;
+            break;
         case MSR_IA32_TSCDEADLINE:
             env->tsc_deadline = msrs[i].data;
             break;
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 4771508..4229dde 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -328,6 +328,24 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     }
 };
 
+static bool tsc_adjust_needed(void *opaque)
+{
+    CPUX86State *env = opaque;
+
+    return env->tsc_adjust != 0;
+}
+
+static const VMStateDescription vmstate_msr_tsc_adjust = {
+    .name = "cpu/msr_tsc_adjust",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT64(tsc_adjust, CPUX86State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool tscdeadline_needed(void *opaque)
 {
     CPUX86State *env = opaque;
@@ -478,6 +496,9 @@ static const VMStateDescription vmstate_cpu = {
             .vmsd = &vmstate_fpop_ip_dp,
             .needed = fpop_ip_dp_needed,
         }, {
+            .vmsd = &vmstate_msr_tsc_adjust,
+            .needed = tsc_adjust_needed,
+        }, {
             .vmsd = &vmstate_msr_tscdeadline,
             .needed = tscdeadline_needed,
         }, {