Thu Jan 31 06:39:53 PST 2013

MAINTAINERS                        |    2 
 block-migration.c                  |    7 
 block.c                            |  124 ---
 block/bochs.c                      |    2 
 block/cow.c                        |    2 
 block/mirror.c                     |  382 +++++++++--
 block/qcow.c                       |    2 
 block/qcow2-refcount.c             |    6 
 block/qcow2.c                      |    2 
 block/qed.c                        |    2 
 block/vdi.c                        |   29 
 block/vmdk.c                       |    4 
 blockdev.c                         |   66 +
 cpus.c                             |    8 
 docs/virtio-balloon-stats.txt      |  104 +++
 fsdev/virtfs-proxy-helper.c        |    4 
 hmp-commands.hx                    |   39 +
 hmp.c                              |   60 +
 hmp.h                              |    2 
 hw/9pfs/virtio-9p-device.c         |    8 
 hw/9pfs/virtio-9p-local.c          |    8 
 hw/9pfs/virtio-9p.c                |    6 
 hw/arm_sysctl.c                    |    1 
 hw/block-common.c                  |    4 
 hw/boards.h                        |    1 
 hw/cadence_gem.c                   |   17 
 hw/cadence_ttc.c                   |    4 
 hw/fw_cfg.c                        |    1 
 hw/ide/ahci.c                      |   98 ++
 hw/ide/ahci.h                      |   20 
 hw/ide/core.c                      |   33 
 hw/ide/ich.c                       |   14 
 hw/pc.c                            |   40 +
 hw/pc_piix.c                       |   26 
 hw/ppc/Makefile.objs               |    3 
 hw/ppc/mac_newworld.c              |    1 
 hw/ppc/mac_oldworld.c              |    1 
 hw/ppc/prep.c                      |  680 +++++++++++++++++++
 hw/ppc_prep.c                      |  680 -------------------
 hw/prep_pci.c                      |   56 +
 hw/s390-virtio-bus.c               |  615 -----------------
 hw/s390-virtio-bus.h               |  124 ---
 hw/s390-virtio.c                   |  274 -------
 hw/s390-virtio.h                   |   22 
 hw/s390x/Makefile.objs             |    5 
 hw/s390x/css.c                     | 1277 +++++++++++++++++++++++++++++++++++++
 hw/s390x/css.h                     |   99 ++
 hw/s390x/ipl.c                     |    2 
 hw/s390x/s390-virtio-bus.c         |  623 ++++++++++++++++++
 hw/s390x/s390-virtio-bus.h         |  124 +++
 hw/s390x/s390-virtio-ccw.c         |  134 +++
 hw/s390x/s390-virtio-hcall.c       |    2 
 hw/s390x/s390-virtio.c             |  293 ++++++++
 hw/s390x/s390-virtio.h             |   28 
 hw/s390x/virtio-ccw.c              |  960 +++++++++++++++++++++++++++
 hw/s390x/virtio-ccw.h              |   98 ++
 hw/sun4m.c                         |    3 
 hw/sun4u.c                         |    1 
 hw/vexpress.c                      |    2 
 hw/vhost.c                         |    7 
 hw/virtio-balloon.c                |  175 ++++-
 hw/xilinx_axidma.c                 |    2 
 include/block/block.h              |   11 
 include/block/block_int.h          |   10 
 include/qemu-common.h              |    3 
 include/qemu/hbitmap.h             |  208 ++++++
 include/qemu/host-utils.h          |   26 
 include/qom/cpu.h                  |   17 
 include/qom/object.h               |    8 
 include/sysemu/cpus.h              |    7 
 include/sysemu/kvm.h               |    6 
 include/ui/qemu-pixman.h           |    2 
 include/ui/spice-display.h         |    1 
 kvm-all.c                          |    5 
 kvm-stub.c                         |    2 
 libcacard/vcard_emul_nss.c         |    4 
 libcacard/vreader.c                |    2 
 libcacard/vscclient.c              |    4 
 net/net.c                          |    4 
 qapi-schema.json                   |  109 ++-
 qapi/qmp-registry.c                |    2 
 qemu-char.c                        |  203 +++++
 qemu-log.c                         |   15 
 qemu-options.hx                    |   10 
 qga/commands-posix.c               |   20 
 qga/commands.c                     |    2 
 qmp-commands.hx                    |   91 ++
 qom/cpu.c                          |   13 
 qom/object.c                       |    5 
 readline.c                         |    4 
 scripts/kvm/vmxcap                 |    1 
 slirp/slirp.c                      |    8 
 target-alpha/cpu.c                 |   16 
 target-arm/cpu.c                   |   18 
 target-arm/helper.c                |    6 
 target-i386/cpu.c                  |  389 ++++-------
 target-i386/cpu.h                  |   12 
 target-i386/kvm.c                  |   32 
 target-i386/topology.h             |  136 +++
 target-m68k/cpu.c                  |   20 
 target-m68k/helper.c               |    6 
 target-openrisc/cpu.c              |   36 -
 target-openrisc/exception_helper.c |    2 
 target-openrisc/fpu_helper.c       |   32 
 target-openrisc/int_helper.c       |    2 
 target-openrisc/interrupt_helper.c |    2 
 target-openrisc/mmu.c              |    6 
 target-openrisc/sys_helper.c       |    4 
 target-ppc/kvm.c                   |    5 
 target-ppc/translate_init.c        |    2 
 target-s390x/Makefile.objs         |    2 
 target-s390x/cpu.h                 |  247 +++++++
 target-s390x/helper.c              |  200 +++++
 target-s390x/ioinst.c              |  761 ++++++++++++++++++++++
 target-s390x/ioinst.h              |  230 ++++++
 target-s390x/kvm.c                 |  244 ++++++-
 target-unicore32/cpu.c             |   26 
 target-unicore32/helper.c          |    6 
 tests/.gitignore                   |    1 
 tests/Makefile                     |   12 
 tests/m48t59-test.c                |    2 
 tests/qemu-iotests/041             |   81 ++
 tests/qemu-iotests/041.out         |    4 
 tests/rtc-test.c                   |    2 
 tests/test-hbitmap.c               |  401 +++++++++++
 tests/test-x86-cpuid.c             |  110 +++
 trace-events                       |   30 
 ui/qemu-pixman.c                   |    3 
 ui/spice-core.c                    |    4 
 ui/vnc-ws.c                        |   11 
 util/Makefile.objs                 |    2 
 util/hbitmap.c                     |  401 +++++++++++
 util/qemu-option.c                 |    8 
 vl.c                               |   52 +
 134 files changed, 9323 insertions(+), 2465 deletions(-)

New commits:
commit 321f211707822b4c87f0bb89e4f46586fff43163
Merge: b0df98f f61850b
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 30 10:51:47 2013 -0600

    Merge remote-tracking branch 'pmaydell/arm-devs.next' into staging
    
    # By Christoffer Dall (1) and Peter Maydell (1)
    # Via Peter Maydell
    * pmaydell/arm-devs.next:
      hw/vexpress: Use correct HBI (board model number) for vexpress-a15
      hw/arm_sysctl: Clear sysctl cfgctrl start bit

commit b0df98f3a2968eb5aba3075d1cf9b1a3931907e0
Merge: 7cc2a8b 99f4280
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 30 10:51:18 2013 -0600

    Merge remote-tracking branch 'stefanha/trivial-patches' into staging
    
    # By Markus Armbruster (12) and others
    # Via Stefan Hajnoczi
    * stefanha/trivial-patches:
      qmp-commands.hx: s/tray-open/tray_open/ to match qapi schema
      tests: Fix {rtc, m48t59}-test build on illumos
      qemu-pixman.h: Avoid mutual inclusion loop with console.h
      qemu-ga: Fix unchecked strdup() by converting to g_strdup()
      qapi: Fix unchecked strdup() by converting to g_strdup()
      libcacard: Fix unchecked strdup() by converting to g_strdup()
      qemu-log: Plug trivial memory leak in cpu_set_log_filename()
      qemu-log: Fix unchecked strdup() by converting to g_strdup()
      virtfs-proxy-helper: Fix unchecked strdup() by conv. to g_strdup()
      spice: Fix unchecked strdup() by converting to g_strdup()
      readline: Fix unchecked strdup() by converting to g_strdup()
      hw/9pfs: Fix unchecked strdup() by converting to g_strdup()
      g_strdup(NULL) returns NULL; simplify
      g_malloc(0) and g_malloc0(0) return NULL; simplify
      xilinx_axidma: Fix debug mode compile messages
      cadence_gem: Debug mode compile fixes
      cadence_ttc: Debug mode compile fixes
      vnc: Clean up vncws_send_handshake_response()

commit 7cc2a8b14a363777e7d5b7d102176fba0cf27667
Merge: 6ac5107 7561015
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 30 10:48:23 2013 -0600

    Merge remote-tracking branch 'afaerber-or/prep-up' into staging
    
    # By Andreas FÃ¤rber
    # Via Andreas FÃ¤rber
    * afaerber-or/prep-up:
      prep: Move PReP machine to hw/ppc/
      prep_pci: Convert to QOM realizefn
      prep_pci: Create PCIBus and PCIDevice in-place

commit 6ac5107dc7045f2edc43ada2cb3f18d372b9be0b
Merge: 0893d46 19380b1
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Jan 30 10:46:30 2013 -0600

    Merge remote-tracking branch 'agraf/s390-for-upstream' into staging
    
    # By Cornelia Huck (13) and others
    # Via Alexander Graf
    * agraf/s390-for-upstream:
      s390: Drop set_bit usage in virtio_ccw.
      s390: css error codes.
      s390: Use s390_cpu_physical_memory_map for tpi.
      sclpconsole: Don't instantiate sclpconsole with -nodefaults
      s390: Add s390-ccw-virtio machine.
      s390-virtio: Check for NULL device in reset hypercall
      s390: Move hw files to hw/s390x
      virtio-s390: add a reset function to virtio-s390 devices
      s390: Make typeinfo const
      s390: Add new channel I/O based virtio transport.
      s390-virtio: Factor out some initialization code.
      s390: Wire up channel I/O in kvm.
      s390: Virtual channel subsystem support.
      s390: Add channel I/O instructions.
      s390: I/O interrupt and machine check injection.
      s390: Channel I/O basic definitions.
      s390: Add mapping helper functions.
      s390: Lowcore mapping helper.
      s390: Add default support for SCLP console

commit f61850bffbd0bb8b15aa55ebc3470bf1f8ce8664
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Wed Jan 30 15:39:02 2013 +0000

    hw/vexpress: Use correct HBI (board model number) for vexpress-a15
    
    The vexpress-a15 QEMU model is supposed to be a V2P-CA15; the HBI
    (a kind of board model number) for this coretile is 237, not 217.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/vexpress.c b/hw/vexpress.c
index 7f0897c..741b044 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -271,7 +271,7 @@ static void a15_daughterboard_init(const VEDBoardInfo *daughterboard,
         cpu_model = "cortex-a15";
     }
 
-    *proc_id = 0x14000217;
+    *proc_id = 0x14000237;
 
     for (n = 0; n < smp_cpus; n++) {
         ARMCPU *cpu;
commit 706872a56630a206897742b70c69ff99727672d3
Author: Christoffer Dall <c.dall at virtualopensystems.com>
Date:   Wed Jan 30 15:39:01 2013 +0000

    hw/arm_sysctl: Clear sysctl cfgctrl start bit
    
    The start bit should only be set to indicate that a function call is
    underway, right now.  When done with function, clear it.
    
    Signed-off-by: Christoffer Dall <c.dall at virtualopensystems.com>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/hw/arm_sysctl.c b/hw/arm_sysctl.c
index da36f8a..7ecb7da 100644
--- a/hw/arm_sysctl.c
+++ b/hw/arm_sysctl.c
@@ -334,6 +334,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset,
         default:
             s->sys_cfgstat |= 2;        /* error */
         }
+        s->sys_cfgctrl &= ~(1 << 31);
         return;
     case 0xa8: /* SYS_CFGSTAT */
         if (board_id(s) != BOARD_ID_VEXPRESS) {
commit 99f4280854514b22972bd257fe5facc439222d2e
Author: Michal Privoznik <mprivozn at redhat.com>
Date:   Tue Jan 29 17:58:41 2013 +0100

    qmp-commands.hx: s/tray-open/tray_open/ to match qapi schema
    
    Currently, we are using 'tray_open' in QMP and 'tray-open' in
    HMP. However, the QMP documentation was mistakenly using the
    HMP version.
    
    Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qmp-commands.hx b/qmp-commands.hx
index f58a841..f90efe5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1659,7 +1659,7 @@ Each json-object contain the following:
          - Possible values: "unknown"
 - "removable": true if the device is removable, false otherwise (json-bool)
 - "locked": true if the device is locked, false otherwise (json-bool)
-- "tray-open": only present if removable, true if the device has a tray,
+- "tray_open": only present if removable, true if the device has a tray,
                and it is open (json-bool)
 - "inserted": only present if the device is inserted, it is a json-object
    containing the following:
commit a05ddd9216b6c5e9c48eac3433ff6fa4a282fc17
Author: Andreas FÃ¤rber <andreas.faerber at web.de>
Date:   Sat Jan 26 15:27:59 2013 +0100

    tests: Fix {rtc, m48t59}-test build on illumos
    
    Struct tm does not have tm_gmtoff field on illumos.
    Fix the build by not zero-initializing these fields on Solaris.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <andreas.faerber at web.de>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/m48t59-test.c b/tests/m48t59-test.c
index d79f554..77d69b3 100644
--- a/tests/m48t59-test.c
+++ b/tests/m48t59-test.c
@@ -142,7 +142,9 @@ static void cmos_get_date_time(struct tm *date)
     date->tm_mday = mday;
     date->tm_mon = mon - 1;
     date->tm_year = base_year + year - 1900;
+#ifndef __sun__
     date->tm_gmtoff = 0;
+#endif
 
     ts = mktime(date);
 }
diff --git a/tests/rtc-test.c b/tests/rtc-test.c
index e7123ca..203c0fc 100644
--- a/tests/rtc-test.c
+++ b/tests/rtc-test.c
@@ -115,7 +115,9 @@ static void cmos_get_date_time(struct tm *date)
     date->tm_mday = mday;
     date->tm_mon = mon - 1;
     date->tm_year = base_year + year - 1900;
+#ifndef __sun__
     date->tm_gmtoff = 0;
+#endif
 
     ts = mktime(date);
 }
commit 0b0878611c64f027ca5d15ba641f423cc04ead3f
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Mon Jan 21 12:03:47 2013 +0000

    qemu-pixman.h: Avoid mutual inclusion loop with console.h
    
    Remove an unnecessary mutual inclusion loop between qemu-pixman.h and
    console.h, since the former was only including the latter for
    'PixelFormat*', which can be provided by typedefs.h.  This requires a
    minor adjustment to the files which included qemu-pixman.h, since
    they were relying on it implicitly dragging in all of console.h.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Acked-by: Gerd Hoffmann <kraxel at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/include/ui/qemu-pixman.h b/include/ui/qemu-pixman.h
index 016fd87..b032f52 100644
--- a/include/ui/qemu-pixman.h
+++ b/include/ui/qemu-pixman.h
@@ -15,7 +15,7 @@
 #pragma GCC diagnostic error "-Wredundant-decls"
 #endif
 
-#include "console.h"
+#include "qemu/typedefs.h"
 
 /*
  * pixman image formats are defined to be native endian,
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 8b192e9..46f9530 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -21,6 +21,7 @@
 
 #include "qemu/thread.h"
 #include "ui/qemu-pixman.h"
+#include "ui/console.h"
 #include "sysemu/sysemu.h"
 
 #define NUM_MEMSLOTS 8
diff --git a/ui/qemu-pixman.c b/ui/qemu-pixman.c
index 609335a..6dcbe90 100644
--- a/ui/qemu-pixman.c
+++ b/ui/qemu-pixman.c
@@ -3,7 +3,8 @@
  * See the COPYING file in the top-level directory.
  */
 
-#include "ui/qemu-pixman.h"
+#include "qemu-common.h"
+#include "ui/console.h"
 
 int qemu_pixman_get_type(int rshift, int gshift, int bshift)
 {
commit 24a53049537886cf93273c01e3d7727444321afc
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:06 2013 +0100

    qemu-ga: Fix unchecked strdup() by converting to g_strdup()
    
    I figure it's freed somewhere deep down in QAPI, with g_free().
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qga/commands.c b/qga/commands.c
index 7ffb35e..528b082 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -61,7 +61,7 @@ struct GuestAgentInfo *qmp_guest_info(Error **err)
 
     while (*cmd_list) {
         cmd_info = g_malloc0(sizeof(GuestAgentCommandInfo));
-        cmd_info->name = strdup(*cmd_list);
+        cmd_info->name = g_strdup(*cmd_list);
         cmd_info->enabled = qmp_command_is_enabled(cmd_info->name);
 
         cmd_info_list = g_malloc0(sizeof(GuestAgentCommandInfoList));
commit 13b10e05e4b89418a78500da05b25164f786d7e4
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:05 2013 +0100

    qapi: Fix unchecked strdup() by converting to g_strdup()
    
    Note that we already free with g_free().
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qapi/qmp-registry.c b/qapi/qmp-registry.c
index 70cdbca..28bbbe8 100644
--- a/qapi/qmp-registry.c
+++ b/qapi/qmp-registry.c
@@ -92,7 +92,7 @@ char **qmp_get_command_list(void)
     list_head = list = g_malloc0(count * sizeof(char *));
 
     QTAILQ_FOREACH(cmd, &qmp_commands, node) {
-        *list = strdup(cmd->name);
+        *list = g_strdup(cmd->name);
         list++;
     }
 
commit be168af860109a1ecc8526c5bbe0ace1536448b8
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:04 2013 +0100

    libcacard: Fix unchecked strdup() by converting to g_strdup()
    
    Note that we already free with g_free().
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/libcacard/vcard_emul_nss.c b/libcacard/vcard_emul_nss.c
index 5f565e0..df79476 100644
--- a/libcacard/vcard_emul_nss.c
+++ b/libcacard/vcard_emul_nss.c
@@ -454,7 +454,7 @@ vreader_emul_new(PK11SlotInfo *slot, VCardEmulType type, const char *params)
 
     new_reader_emul->slot = PK11_ReferenceSlot(slot);
     new_reader_emul->default_type = type;
-    new_reader_emul->type_params = strdup(params);
+    new_reader_emul->type_params = g_strdup(params);
     new_reader_emul->present = PR_FALSE;
     new_reader_emul->series = 0;
     new_reader_emul->saved_vcard = NULL;
@@ -997,7 +997,7 @@ vcard_emul_init(const VCardEmulOptions *options)
     /* We should control this with options. For now we mirror out any
      * removable hardware slot */
     default_card_type = options->hw_card_type;
-    default_type_params = strdup(options->hw_type_params);
+    default_type_params = g_strdup(options->hw_type_params);
 
     SECMOD_GetReadLock(module_lock);
     for (mlp = module_list; mlp; mlp = mlp->next) {
diff --git a/libcacard/vreader.c b/libcacard/vreader.c
index 313349b..f3efc27 100644
--- a/libcacard/vreader.c
+++ b/libcacard/vreader.c
@@ -49,7 +49,7 @@ vreader_new(const char *name, VReaderEmul *private,
     reader = (VReader *)g_malloc(sizeof(VReader));
     qemu_mutex_init(&reader->lock);
     reader->reference_count = 1;
-    reader->name = name ? strdup(name) : NULL;
+    reader->name = g_strdup(name);
     reader->card = NULL;
     reader->id = (vreader_id_t)-1;
     reader->reader_private = private;
diff --git a/libcacard/vscclient.c b/libcacard/vscclient.c
index 2fce52b..9b744f2 100644
--- a/libcacard/vscclient.c
+++ b/libcacard/vscclient.c
@@ -503,8 +503,8 @@ main(
         command_line_options = vcard_emul_options(emul_args);
     }
 
-    qemu_host = strdup(argv[argc - 2]);
-    qemu_port = strdup(argv[argc - 1]);
+    qemu_host = g_strdup(argv[argc - 2]);
+    qemu_port = g_strdup(argv[argc - 1]);
     sock = connect_to_qemu(qemu_host, qemu_port);
     if (sock == -1) {
         fprintf(stderr, "error opening socket, exiting.\n");
commit 40a50b0a73d185c85cf62023f07e3091861081bb
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:03 2013 +0100

    qemu-log: Plug trivial memory leak in cpu_set_log_filename()
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-log.c b/qemu-log.c
index 64a1b88..30c9ab0 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -21,10 +21,12 @@
 #include "qemu/log.h"
 
 #ifdef WIN32
-static const char *logfilename = "qemu.log";
+#define DEFAULT_LOGFILENAME "qemu.log"
 #else
-static const char *logfilename = "/tmp/qemu.log";
+#define DEFAULT_LOGFILENAME "/tmp/qemu.log"
 #endif
+
+static char *logfilename;
 FILE *qemu_logfile;
 int qemu_loglevel;
 static int log_append = 0;
@@ -54,11 +56,13 @@ void qemu_log_mask(int mask, const char *fmt, ...)
 /* enable or disable low levels log */
 void qemu_set_log(int log_flags, bool use_own_buffers)
 {
+    const char *fname = logfilename ?: DEFAULT_LOGFILENAME;
+
     qemu_loglevel = log_flags;
     if (qemu_loglevel && !qemu_logfile) {
-        qemu_logfile = fopen(logfilename, log_append ? "a" : "w");
+        qemu_logfile = fopen(fname, log_append ? "a" : "w");
         if (!qemu_logfile) {
-            perror(logfilename);
+            perror(fname);
             _exit(1);
         }
         /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
@@ -84,6 +88,7 @@ void qemu_set_log(int log_flags, bool use_own_buffers)
 
 void cpu_set_log_filename(const char *filename)
 {
+    g_free(logfilename);
     logfilename = g_strdup(filename);
     if (qemu_logfile) {
         fclose(qemu_logfile);
commit 636e0f27c6675839dc43d4c7613dfefc222234eb
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:02 2013 +0100

    qemu-log: Fix unchecked strdup() by converting to g_strdup()
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-log.c b/qemu-log.c
index b655b30..64a1b88 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -84,7 +84,7 @@ void qemu_set_log(int log_flags, bool use_own_buffers)
 
 void cpu_set_log_filename(const char *filename)
 {
-    logfilename = strdup(filename);
+    logfilename = g_strdup(filename);
     if (qemu_logfile) {
         fclose(qemu_logfile);
         qemu_logfile = NULL;
commit 606017de2f254f847a009af770f8babc9a9fcadc
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:01 2013 +0100

    virtfs-proxy-helper: Fix unchecked strdup() by conv. to g_strdup()
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c
index 6b9afd3..36f6616 100644
--- a/fsdev/virtfs-proxy-helper.c
+++ b/fsdev/virtfs-proxy-helper.c
@@ -1039,7 +1039,7 @@ int main(int argc, char **argv)
         }
         switch (c) {
         case 'p':
-            rpath = strdup(optarg);
+            rpath = g_strdup(optarg);
             break;
         case 'n':
             is_daemon = false;
@@ -1048,7 +1048,7 @@ int main(int argc, char **argv)
             sock = atoi(optarg);
             break;
         case 's':
-            sock_name = strdup(optarg);
+            sock_name = g_strdup(optarg);
             break;
         case 'u':
             own_u = atoi(optarg);
commit fd3bea3f44500bbd361e617a4316df073b112eec
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:08:00 2013 +0100

    spice: Fix unchecked strdup() by converting to g_strdup()
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/ui/spice-core.c b/ui/spice-core.c
index 3f2c565..bcc4199 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -848,8 +848,8 @@ static int qemu_spice_set_ticket(bool fail_if_conn, bool disconnect_if_conn)
 int qemu_spice_set_passwd(const char *passwd,
                           bool fail_if_conn, bool disconnect_if_conn)
 {
-    free(auth_passwd);
-    auth_passwd = strdup(passwd);
+    g_free(auth_passwd);
+    auth_passwd = g_strdup(passwd);
     return qemu_spice_set_ticket(fail_if_conn, disconnect_if_conn);
 }
 
commit c3baa5f9e4f84bbc9ed9c3fe590835ecc8b0bf4a
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:07:59 2013 +0100

    readline: Fix unchecked strdup() by converting to g_strdup()
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/readline.c b/readline.c
index a0c9638..d6e04d4 100644
--- a/readline.c
+++ b/readline.c
@@ -247,14 +247,14 @@ static void readline_hist_add(ReadLineState *rs, const char *cmdline)
     }
     if (idx == READLINE_MAX_CMDS) {
 	/* Need to get one free slot */
-	free(rs->history[0]);
+        g_free(rs->history[0]);
 	memmove(rs->history, &rs->history[1],
 	        (READLINE_MAX_CMDS - 1) * sizeof(char *));
 	rs->history[READLINE_MAX_CMDS - 1] = NULL;
 	idx = READLINE_MAX_CMDS - 1;
     }
     if (new_entry == NULL)
-	new_entry = strdup(cmdline);
+        new_entry = g_strdup(cmdline);
     rs->history[idx] = new_entry;
     rs->hist_entry = -1;
 }
commit d3f8e138c23ba082f87c96634d06b978473c1e9b
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:07:58 2013 +0100

    hw/9pfs: Fix unchecked strdup() by converting to g_strdup()
    
    Note: the allocation in virtio_9p_init() is still leaked.  To be fixed
    in a followup commit.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 6eab7f7..74155fb 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -94,7 +94,7 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf)
         exit(1);
     }
 
-    s->tag = strdup(conf->tag);
+    s->tag = g_strdup(conf->tag);
     s->ctx.uid = -1;
 
     s->ops = fse->ops;
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 1136021..f1b1c83 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -46,7 +46,7 @@ static const char *local_mapped_attr_path(FsContext *ctx,
                                           const char *path, char *buffer)
 {
     char *dir_name;
-    char *tmp_path = strdup(path);
+    char *tmp_path = g_strdup(path);
     char *base_name = basename(tmp_path);
 
     /* NULL terminate the directory */
@@ -55,7 +55,7 @@ static const char *local_mapped_attr_path(FsContext *ctx,
 
     snprintf(buffer, PATH_MAX, "%s/%s/%s/%s",
              ctx->fs_root, dir_name, VIRTFS_META_DIR, base_name);
-    free(tmp_path);
+    g_free(tmp_path);
     return buffer;
 }
 
@@ -130,7 +130,7 @@ static int local_create_mapped_attr_dir(FsContext *ctx, const char *path)
 {
     int err;
     char attr_dir[PATH_MAX];
-    char *tmp_path = strdup(path);
+    char *tmp_path = g_strdup(path);
 
     snprintf(attr_dir, PATH_MAX, "%s/%s/%s",
              ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR);
@@ -139,7 +139,7 @@ static int local_create_mapped_attr_dir(FsContext *ctx, const char *path)
     if (err < 0 && errno == EEXIST) {
         err = 0;
     }
-    free(tmp_path);
+    g_free(tmp_path);
     return err;
 }
 
commit c64f50d1e250879611e6f328e2c4fb18c8a4ab0c
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:07:57 2013 +0100

    g_strdup(NULL) returns NULL; simplify
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 6f427df..6eab7f7 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -85,11 +85,7 @@ VirtIODevice *virtio_9p_init(DeviceState *dev, V9fsConf *conf)
     }
 
     s->ctx.export_flags = fse->export_flags;
-    if (fse->path) {
-        s->ctx.fs_root = g_strdup(fse->path);
-    } else {
-        s->ctx.fs_root = NULL;
-    }
+    s->ctx.fs_root = g_strdup(fse->path);
     s->ctx.exops.get_st_gen = NULL;
     len = strlen(conf->tag);
     if (len > MAX_TAG_LEN - 1) {
diff --git a/hw/block-common.c b/hw/block-common.c
index 0f1b64e..d21ec3a 100644
--- a/hw/block-common.c
+++ b/hw/block-common.c
@@ -18,9 +18,7 @@ void blkconf_serial(BlockConf *conf, char **serial)
     if (!*serial) {
         /* try to fall back to value set with legacy -drive serial=... */
         dinfo = drive_get_by_blockdev(conf->bs);
-        if (dinfo->serial) {
-            *serial = g_strdup(dinfo->serial);
-        }
+        *serial = g_strdup(dinfo->serial);
     }
 }
 
diff --git a/net/net.c b/net/net.c
index cdd9b04..2f0ab3a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -566,9 +566,7 @@ static int net_init_nic(const NetClientOptions *opts, const char *name,
         assert(peer);
         nd->netdev = peer;
     }
-    if (name) {
-        nd->name = g_strdup(name);
-    }
+    nd->name = g_strdup(name);
     if (nic->has_model) {
         nd->model = g_strdup(nic->model);
     }
diff --git a/slirp/slirp.c b/slirp/slirp.c
index e93b578..0e6e232 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -225,12 +225,8 @@ Slirp *slirp_init(int restricted, struct in_addr vnetwork,
         pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname),
                 vhostname);
     }
-    if (tftp_path) {
-        slirp->tftp_prefix = g_strdup(tftp_path);
-    }
-    if (bootfile) {
-        slirp->bootp_filename = g_strdup(bootfile);
-    }
+    slirp->tftp_prefix = g_strdup(tftp_path);
+    slirp->bootp_filename = g_strdup(bootfile);
     slirp->vdhcp_startaddr = vdhcp_start;
     slirp->vnameserver_addr = vnameserver;
 
diff --git a/util/qemu-option.c b/util/qemu-option.c
index f532b76..c12e724 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -643,9 +643,7 @@ static void opt_set(QemuOpts *opts, const char *name, const char *value,
         QTAILQ_INSERT_TAIL(&opts->head, opt, next);
     }
     opt->desc = desc;
-    if (value) {
-        opt->str = g_strdup(value);
-    }
+    opt->str = g_strdup(value);
     qemu_opt_parse(opt, &local_err);
     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
@@ -792,9 +790,7 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
         }
     }
     opts = g_malloc0(sizeof(*opts));
-    if (id) {
-        opts->id = g_strdup(id);
-    }
+    opts->id = g_strdup(id);
     opts->list = list;
     loc_save(&opts->loc);
     QTAILQ_INIT(&opts->head);
commit 6528499fa4c3ceaec01ad29d8090ef55918ebfb3
Author: Markus Armbruster <armbru at redhat.com>
Date:   Tue Jan 22 11:07:56 2013 +0100

    g_malloc(0) and g_malloc0(0) return NULL; simplify
    
    Once upon a time, it was decided that qemu_malloc(0) should abort.
    Switching to glib retired that bright idea.  Some code that was added
    to cope with it (e.g. in commits 702ef63, b76b6e9) is still around.
    Bury it.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6a95aa6..bc1784c 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -737,11 +737,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
      * l1_table_offset when it is the current s->l1_table_offset! Be careful
      * when changing this! */
     if (l1_table_offset != s->l1_table_offset) {
-        if (l1_size2 != 0) {
-            l1_table = g_malloc0(align_offset(l1_size2, 512));
-        } else {
-            l1_table = NULL;
-        }
+        l1_table = g_malloc0(align_offset(l1_size2, 512));
         l1_allocated = 1;
         if (bdrv_pread(bs->file, l1_table_offset,
                        l1_table, l1_size2) != l1_size2)
diff --git a/block/vdi.c b/block/vdi.c
index 257a592..87c691b 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -443,9 +443,7 @@ static int vdi_open(BlockDriverState *bs, int flags)
 
     bmap_size = header.blocks_in_image * sizeof(uint32_t);
     bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE;
-    if (bmap_size > 0) {
-        s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
-    }
+    s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
     ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
     if (ret < 0) {
         goto fail_free_bmap;
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 0aaf0d2..b795839 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -3101,11 +3101,7 @@ static void v9fs_xattrcreate(void *opaque)
     xattr_fidp->fs.xattr.flags = flags;
     v9fs_string_init(&xattr_fidp->fs.xattr.name);
     v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
-    if (size) {
-        xattr_fidp->fs.xattr.value = g_malloc(size);
-    } else {
-        xattr_fidp->fs.xattr.value = NULL;
-    }
+    xattr_fidp->fs.xattr.value = g_malloc(size);
     err = offset;
     put_fid(pdu, file_fidp);
 out_nofid:
diff --git a/hw/vhost.c b/hw/vhost.c
index cee8aad..0dd2a9a 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -269,11 +269,8 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
     vhost_log_chunk_t *log;
     uint64_t log_base;
     int r, i;
-    if (size) {
-        log = g_malloc0(size * sizeof *log);
-    } else {
-        log = NULL;
-    }
+
+    log = g_malloc0(size * sizeof *log);
     log_base = (uint64_t)(unsigned long)log;
     r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
     assert(r >= 0);
commit 75610155562bd480356a7a5018e5de4c44ec7884
Author: Andreas FÃ¤rber <andreas.faerber at web.de>
Date:   Sat Jan 26 20:41:58 2013 +0100

    prep: Move PReP machine to hw/ppc/
    
    Signed-off-by: Andreas FÃ¤rber <andreas.faerber at web.de>

diff --git a/MAINTAINERS b/MAINTAINERS
index 9dd4c20..21043e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -395,7 +395,7 @@ PReP
 M: Andreas FÃ¤rber <andreas.faerber at web.de>
 L: qemu-ppc at nongnu.org
 S: Odd Fixes
-F: hw/ppc_prep.c
+F: hw/ppc/prep.c
 F: hw/prep_pci.[hc]
 F: hw/pc87312.[hc]
 
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 462146b..f762050 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -2,7 +2,6 @@
 obj-y = ppc.o ppc_booke.o
 # PREP target
 obj-y += mc146818rtc.o
-obj-y += ppc_prep.o
 # IBM pSeries (sPAPR)
 obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
 obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
@@ -24,6 +23,8 @@ obj-y += xilinx_ethlite.o
 
 obj-y := $(addprefix ../,$(obj-y))
 
+# PReP
+obj-y += prep.o
 # OldWorld PowerMac
 obj-y += mac_oldworld.o
 # NewWorld PowerMac
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
new file mode 100644
index 0000000..e06dded
--- /dev/null
+++ b/hw/ppc/prep.c
@@ -0,0 +1,680 @@
+/*
+ * QEMU PPC PREP hardware System Emulator
+ *
+ * Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw/hw.h"
+#include "hw/nvram.h"
+#include "hw/pc.h"
+#include "hw/serial.h"
+#include "hw/fdc.h"
+#include "net/net.h"
+#include "sysemu/sysemu.h"
+#include "hw/isa.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/ppc.h"
+#include "hw/boards.h"
+#include "qemu/log.h"
+#include "hw/ide.h"
+#include "hw/loader.h"
+#include "hw/mc146818rtc.h"
+#include "hw/pc87312.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/arch_init.h"
+#include "exec/address-spaces.h"
+
+//#define HARD_DEBUG_PPC_IO
+//#define DEBUG_PPC_IO
+
+/* SMP is not enabled, for now */
+#define MAX_CPUS 1
+
+#define MAX_IDE_BUS 2
+
+#define BIOS_SIZE (1024 * 1024)
+#define BIOS_FILENAME "ppc_rom.bin"
+#define KERNEL_LOAD_ADDR 0x01000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+#if defined (HARD_DEBUG_PPC_IO) && !defined (DEBUG_PPC_IO)
+#define DEBUG_PPC_IO
+#endif
+
+#if defined (HARD_DEBUG_PPC_IO)
+#define PPC_IO_DPRINTF(fmt, ...)                         \
+do {                                                     \
+    if (qemu_loglevel_mask(CPU_LOG_IOPORT)) {            \
+        qemu_log("%s: " fmt, __func__ , ## __VA_ARGS__); \
+    } else {                                             \
+        printf("%s : " fmt, __func__ , ## __VA_ARGS__);  \
+    }                                                    \
+} while (0)
+#elif defined (DEBUG_PPC_IO)
+#define PPC_IO_DPRINTF(fmt, ...) \
+qemu_log_mask(CPU_LOG_IOPORT, fmt, ## __VA_ARGS__)
+#else
+#define PPC_IO_DPRINTF(fmt, ...) do { } while (0)
+#endif
+
+/* Constants for devices init */
+static const int ide_iobase[2] = { 0x1f0, 0x170 };
+static const int ide_iobase2[2] = { 0x3f6, 0x376 };
+static const int ide_irq[2] = { 13, 13 };
+
+#define NE2000_NB_MAX 6
+
+static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
+static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
+
+/* ISA IO ports bridge */
+#define PPC_IO_BASE 0x80000000
+
+/* PowerPC control and status registers */
+#if 0 // Not used
+static struct {
+    /* IDs */
+    uint32_t veni_devi;
+    uint32_t revi;
+    /* Control and status */
+    uint32_t gcsr;
+    uint32_t xcfr;
+    uint32_t ct32;
+    uint32_t mcsr;
+    /* General purpose registers */
+    uint32_t gprg[6];
+    /* Exceptions */
+    uint32_t feen;
+    uint32_t fest;
+    uint32_t fema;
+    uint32_t fecl;
+    uint32_t eeen;
+    uint32_t eest;
+    uint32_t eecl;
+    uint32_t eeint;
+    uint32_t eemck0;
+    uint32_t eemck1;
+    /* Error diagnostic */
+} XCSR;
+
+static void PPC_XCSR_writeb (void *opaque,
+                             hwaddr addr, uint32_t value)
+{
+    printf("%s: 0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", __func__, addr,
+           value);
+}
+
+static void PPC_XCSR_writew (void *opaque,
+                             hwaddr addr, uint32_t value)
+{
+    printf("%s: 0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", __func__, addr,
+           value);
+}
+
+static void PPC_XCSR_writel (void *opaque,
+                             hwaddr addr, uint32_t value)
+{
+    printf("%s: 0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", __func__, addr,
+           value);
+}
+
+static uint32_t PPC_XCSR_readb (void *opaque, hwaddr addr)
+{
+    uint32_t retval = 0;
+
+    printf("%s: 0x" TARGET_FMT_plx " <= %08" PRIx32 "\n", __func__, addr,
+           retval);
+
+    return retval;
+}
+
+static uint32_t PPC_XCSR_readw (void *opaque, hwaddr addr)
+{
+    uint32_t retval = 0;
+
+    printf("%s: 0x" TARGET_FMT_plx " <= %08" PRIx32 "\n", __func__, addr,
+           retval);
+
+    return retval;
+}
+
+static uint32_t PPC_XCSR_readl (void *opaque, hwaddr addr)
+{
+    uint32_t retval = 0;
+
+    printf("%s: 0x" TARGET_FMT_plx " <= %08" PRIx32 "\n", __func__, addr,
+           retval);
+
+    return retval;
+}
+
+static const MemoryRegionOps PPC_XCSR_ops = {
+    .old_mmio = {
+        .read = { PPC_XCSR_readb, PPC_XCSR_readw, PPC_XCSR_readl, },
+        .write = { PPC_XCSR_writeb, PPC_XCSR_writew, PPC_XCSR_writel, },
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+#endif
+
+/* Fake super-io ports for PREP platform (Intel 82378ZB) */
+typedef struct sysctrl_t {
+    qemu_irq reset_irq;
+    M48t59State *nvram;
+    uint8_t state;
+    uint8_t syscontrol;
+    int contiguous_map;
+    int endian;
+} sysctrl_t;
+
+enum {
+    STATE_HARDFILE = 0x01,
+};
+
+static sysctrl_t *sysctrl;
+
+static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
+{
+    sysctrl_t *sysctrl = opaque;
+
+    PPC_IO_DPRINTF("0x%08" PRIx32 " => 0x%02" PRIx32 "\n",
+                   addr - PPC_IO_BASE, val);
+    switch (addr) {
+    case 0x0092:
+        /* Special port 92 */
+        /* Check soft reset asked */
+        if (val & 0x01) {
+            qemu_irq_raise(sysctrl->reset_irq);
+        } else {
+            qemu_irq_lower(sysctrl->reset_irq);
+        }
+        /* Check LE mode */
+        if (val & 0x02) {
+            sysctrl->endian = 1;
+        } else {
+            sysctrl->endian = 0;
+        }
+        break;
+    case 0x0800:
+        /* Motorola CPU configuration register : read-only */
+        break;
+    case 0x0802:
+        /* Motorola base module feature register : read-only */
+        break;
+    case 0x0803:
+        /* Motorola base module status register : read-only */
+        break;
+    case 0x0808:
+        /* Hardfile light register */
+        if (val & 1)
+            sysctrl->state |= STATE_HARDFILE;
+        else
+            sysctrl->state &= ~STATE_HARDFILE;
+        break;
+    case 0x0810:
+        /* Password protect 1 register */
+        if (sysctrl->nvram != NULL)
+            m48t59_toggle_lock(sysctrl->nvram, 1);
+        break;
+    case 0x0812:
+        /* Password protect 2 register */
+        if (sysctrl->nvram != NULL)
+            m48t59_toggle_lock(sysctrl->nvram, 2);
+        break;
+    case 0x0814:
+        /* L2 invalidate register */
+        //        tlb_flush(first_cpu, 1);
+        break;
+    case 0x081C:
+        /* system control register */
+        sysctrl->syscontrol = val & 0x0F;
+        break;
+    case 0x0850:
+        /* I/O map type register */
+        sysctrl->contiguous_map = val & 0x01;
+        break;
+    default:
+        printf("ERROR: unaffected IO port write: %04" PRIx32
+               " => %02" PRIx32"\n", addr, val);
+        break;
+    }
+}
+
+static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
+{
+    sysctrl_t *sysctrl = opaque;
+    uint32_t retval = 0xFF;
+
+    switch (addr) {
+    case 0x0092:
+        /* Special port 92 */
+        retval = 0x00;
+        break;
+    case 0x0800:
+        /* Motorola CPU configuration register */
+        retval = 0xEF; /* MPC750 */
+        break;
+    case 0x0802:
+        /* Motorola Base module feature register */
+        retval = 0xAD; /* No ESCC, PMC slot neither ethernet */
+        break;
+    case 0x0803:
+        /* Motorola base module status register */
+        retval = 0xE0; /* Standard MPC750 */
+        break;
+    case 0x080C:
+        /* Equipment present register:
+         *  no L2 cache
+         *  no upgrade processor
+         *  no cards in PCI slots
+         *  SCSI fuse is bad
+         */
+        retval = 0x3C;
+        break;
+    case 0x0810:
+        /* Motorola base module extended feature register */
+        retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */
+        break;
+    case 0x0814:
+        /* L2 invalidate: don't care */
+        break;
+    case 0x0818:
+        /* Keylock */
+        retval = 0x00;
+        break;
+    case 0x081C:
+        /* system control register
+         * 7 - 6 / 1 - 0: L2 cache enable
+         */
+        retval = sysctrl->syscontrol;
+        break;
+    case 0x0823:
+        /* */
+        retval = 0x03; /* no L2 cache */
+        break;
+    case 0x0850:
+        /* I/O map type register */
+        retval = sysctrl->contiguous_map;
+        break;
+    default:
+        printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
+        break;
+    }
+    PPC_IO_DPRINTF("0x%08" PRIx32 " <= 0x%02" PRIx32 "\n",
+                   addr - PPC_IO_BASE, retval);
+
+    return retval;
+}
+
+static inline hwaddr prep_IO_address(sysctrl_t *sysctrl,
+                                                 hwaddr addr)
+{
+    if (sysctrl->contiguous_map == 0) {
+        /* 64 KB contiguous space for IOs */
+        addr &= 0xFFFF;
+    } else {
+        /* 8 MB non-contiguous space for IOs */
+        addr = (addr & 0x1F) | ((addr & 0x007FFF000) >> 7);
+    }
+
+    return addr;
+}
+
+static void PPC_prep_io_writeb (void *opaque, hwaddr addr,
+                                uint32_t value)
+{
+    sysctrl_t *sysctrl = opaque;
+
+    addr = prep_IO_address(sysctrl, addr);
+    cpu_outb(addr, value);
+}
+
+static uint32_t PPC_prep_io_readb (void *opaque, hwaddr addr)
+{
+    sysctrl_t *sysctrl = opaque;
+    uint32_t ret;
+
+    addr = prep_IO_address(sysctrl, addr);
+    ret = cpu_inb(addr);
+
+    return ret;
+}
+
+static void PPC_prep_io_writew (void *opaque, hwaddr addr,
+                                uint32_t value)
+{
+    sysctrl_t *sysctrl = opaque;
+
+    addr = prep_IO_address(sysctrl, addr);
+    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", addr, value);
+    cpu_outw(addr, value);
+}
+
+static uint32_t PPC_prep_io_readw (void *opaque, hwaddr addr)
+{
+    sysctrl_t *sysctrl = opaque;
+    uint32_t ret;
+
+    addr = prep_IO_address(sysctrl, addr);
+    ret = cpu_inw(addr);
+    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " <= 0x%08" PRIx32 "\n", addr, ret);
+
+    return ret;
+}
+
+static void PPC_prep_io_writel (void *opaque, hwaddr addr,
+                                uint32_t value)
+{
+    sysctrl_t *sysctrl = opaque;
+
+    addr = prep_IO_address(sysctrl, addr);
+    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", addr, value);
+    cpu_outl(addr, value);
+}
+
+static uint32_t PPC_prep_io_readl (void *opaque, hwaddr addr)
+{
+    sysctrl_t *sysctrl = opaque;
+    uint32_t ret;
+
+    addr = prep_IO_address(sysctrl, addr);
+    ret = cpu_inl(addr);
+    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " <= 0x%08" PRIx32 "\n", addr, ret);
+
+    return ret;
+}
+
+static const MemoryRegionOps PPC_prep_io_ops = {
+    .old_mmio = {
+        .read = { PPC_prep_io_readb, PPC_prep_io_readw, PPC_prep_io_readl },
+        .write = { PPC_prep_io_writeb, PPC_prep_io_writew, PPC_prep_io_writel },
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+#define NVRAM_SIZE        0x2000
+
+static void cpu_request_exit(void *opaque, int irq, int level)
+{
+    CPUPPCState *env = cpu_single_env;
+
+    if (env && level) {
+        cpu_exit(env);
+    }
+}
+
+static void ppc_prep_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_reset(CPU(cpu));
+}
+
+/* PowerPC PREP hardware initialisation */
+static void ppc_prep_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
+    MemoryRegion *sysmem = get_system_memory();
+    PowerPCCPU *cpu = NULL;
+    CPUPPCState *env = NULL;
+    char *filename;
+    nvram_t nvram;
+    M48t59State *m48t59;
+    MemoryRegion *PPC_io_memory = g_new(MemoryRegion, 1);
+#if 0
+    MemoryRegion *xcsr = g_new(MemoryRegion, 1);
+#endif
+    int linux_boot, i, nb_nics1, bios_size;
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    MemoryRegion *bios = g_new(MemoryRegion, 1);
+    uint32_t kernel_base, initrd_base;
+    long kernel_size, initrd_size;
+    DeviceState *dev;
+    PCIHostState *pcihost;
+    PCIBus *pci_bus;
+    PCIDevice *pci;
+    ISABus *isa_bus;
+    ISADevice *isa;
+    qemu_irq *cpu_exit_irq;
+    int ppc_boot_device;
+    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+
+    sysctrl = g_malloc0(sizeof(sysctrl_t));
+
+    linux_boot = (kernel_filename != NULL);
+
+    /* init CPUs */
+    if (cpu_model == NULL)
+        cpu_model = "602";
+    for (i = 0; i < smp_cpus; i++) {
+        cpu = cpu_ppc_init(cpu_model);
+        if (cpu == NULL) {
+            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
+            exit(1);
+        }
+        env = &cpu->env;
+
+        if (env->flags & POWERPC_FLAG_RTC_CLK) {
+            /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
+            cpu_ppc_tb_init(env, 7812500UL);
+        } else {
+            /* Set time-base frequency to 100 Mhz */
+            cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
+        }
+        qemu_register_reset(ppc_prep_reset, cpu);
+    }
+
+    /* allocate RAM */
+    memory_region_init_ram(ram, "ppc_prep.ram", ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(sysmem, 0, ram);
+
+    /* allocate and load BIOS */
+    memory_region_init_ram(bios, "ppc_prep.bios", BIOS_SIZE);
+    memory_region_set_readonly(bios, true);
+    memory_region_add_subregion(sysmem, (uint32_t)(-BIOS_SIZE), bios);
+    vmstate_register_ram_global(bios);
+    if (bios_name == NULL)
+        bios_name = BIOS_FILENAME;
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (filename) {
+        bios_size = get_image_size(filename);
+    } else {
+        bios_size = -1;
+    }
+    if (bios_size > 0 && bios_size <= BIOS_SIZE) {
+        hwaddr bios_addr;
+        bios_size = (bios_size + 0xfff) & ~0xfff;
+        bios_addr = (uint32_t)(-bios_size);
+        bios_size = load_image_targphys(filename, bios_addr, bios_size);
+    }
+    if (bios_size < 0 || bios_size > BIOS_SIZE) {
+        hw_error("qemu: could not load PPC PREP bios '%s'\n", bios_name);
+    }
+    if (filename) {
+        g_free(filename);
+    }
+
+    if (linux_boot) {
+        kernel_base = KERNEL_LOAD_ADDR;
+        /* now we can load the kernel */
+        kernel_size = load_image_targphys(kernel_filename, kernel_base,
+                                          ram_size - kernel_base);
+        if (kernel_size < 0) {
+            hw_error("qemu: could not load kernel '%s'\n", kernel_filename);
+            exit(1);
+        }
+        /* load initrd */
+        if (initrd_filename) {
+            initrd_base = INITRD_LOAD_ADDR;
+            initrd_size = load_image_targphys(initrd_filename, initrd_base,
+                                              ram_size - initrd_base);
+            if (initrd_size < 0) {
+                hw_error("qemu: could not load initial ram disk '%s'\n",
+                          initrd_filename);
+            }
+        } else {
+            initrd_base = 0;
+            initrd_size = 0;
+        }
+        ppc_boot_device = 'm';
+    } else {
+        kernel_base = 0;
+        kernel_size = 0;
+        initrd_base = 0;
+        initrd_size = 0;
+        ppc_boot_device = '\0';
+        /* For now, OHW cannot boot from the network. */
+        for (i = 0; boot_device[i] != '\0'; i++) {
+            if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
+                ppc_boot_device = boot_device[i];
+                break;
+            }
+        }
+        if (ppc_boot_device == '\0') {
+            fprintf(stderr, "No valid boot device for Mac99 machine\n");
+            exit(1);
+        }
+    }
+
+    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
+        hw_error("Only 6xx bus is supported on PREP machine\n");
+    }
+
+    dev = qdev_create(NULL, "raven-pcihost");
+    pcihost = PCI_HOST_BRIDGE(dev);
+    pcihost->address_space = get_system_memory();
+    object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+    pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
+    if (pci_bus == NULL) {
+        fprintf(stderr, "Couldn't create PCI host controller.\n");
+        exit(1);
+    }
+
+    /* PCI -> ISA bridge */
+    pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378");
+    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
+    qdev_connect_gpio_out(&pci->qdev, 0,
+                          first_cpu->irq_inputs[PPC6xx_INPUT_INT]);
+    qdev_connect_gpio_out(&pci->qdev, 1, *cpu_exit_irq);
+    sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
+    sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
+    sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
+    sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11));
+    isa_bus = DO_UPCAST(ISABus, qbus, qdev_get_child_bus(&pci->qdev, "isa.0"));
+
+    /* Super I/O (parallel + serial ports) */
+    isa = isa_create(isa_bus, TYPE_PC87312);
+    qdev_prop_set_uint8(&isa->qdev, "config", 13); /* fdc, ser0, ser1, par0 */
+    qdev_init_nofail(&isa->qdev);
+
+    /* Register 8 MB of ISA IO space (needed for non-contiguous map) */
+    memory_region_init_io(PPC_io_memory, &PPC_prep_io_ops, sysctrl,
+                          "ppc-io", 0x00800000);
+    memory_region_add_subregion(sysmem, 0x80000000, PPC_io_memory);
+
+    /* init basic PC hardware */
+    pci_vga_init(pci_bus);
+
+    nb_nics1 = nb_nics;
+    if (nb_nics1 > NE2000_NB_MAX)
+        nb_nics1 = NE2000_NB_MAX;
+    for(i = 0; i < nb_nics1; i++) {
+        if (nd_table[i].model == NULL) {
+	    nd_table[i].model = g_strdup("ne2k_isa");
+        }
+        if (strcmp(nd_table[i].model, "ne2k_isa") == 0) {
+            isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i],
+                            &nd_table[i]);
+        } else {
+            pci_nic_init_nofail(&nd_table[i], "ne2k_pci", NULL);
+        }
+    }
+
+    ide_drive_get(hd, MAX_IDE_BUS);
+    for(i = 0; i < MAX_IDE_BUS; i++) {
+        isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i],
+                     hd[2 * i],
+		     hd[2 * i + 1]);
+    }
+    isa_create_simple(isa_bus, "i8042");
+
+    sysctrl->reset_irq = first_cpu->irq_inputs[PPC6xx_INPUT_HRESET];
+    /* System control ports */
+    register_ioport_read(0x0092, 0x01, 1, &PREP_io_800_readb, sysctrl);
+    register_ioport_write(0x0092, 0x01, 1, &PREP_io_800_writeb, sysctrl);
+    register_ioport_read(0x0800, 0x52, 1, &PREP_io_800_readb, sysctrl);
+    register_ioport_write(0x0800, 0x52, 1, &PREP_io_800_writeb, sysctrl);
+    /* PowerPC control and status register group */
+#if 0
+    memory_region_init_io(xcsr, &PPC_XCSR_ops, NULL, "ppc-xcsr", 0x1000);
+    memory_region_add_subregion(sysmem, 0xFEFF0000, xcsr);
+#endif
+
+    if (usb_enabled(false)) {
+        pci_create_simple(pci_bus, -1, "pci-ohci");
+    }
+
+    m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 59);
+    if (m48t59 == NULL)
+        return;
+    sysctrl->nvram = m48t59;
+
+    /* Initialise NVRAM */
+    nvram.opaque = m48t59;
+    nvram.read_fn = &m48t59_read;
+    nvram.write_fn = &m48t59_write;
+    PPC_NVRAM_set_params(&nvram, NVRAM_SIZE, "PREP", ram_size, ppc_boot_device,
+                         kernel_base, kernel_size,
+                         kernel_cmdline,
+                         initrd_base, initrd_size,
+                         /* XXX: need an option to load a NVRAM image */
+                         0,
+                         graphic_width, graphic_height, graphic_depth);
+
+    /* Special port to get debug messages from Open-Firmware */
+    register_ioport_write(0x0F00, 4, 1, &PPC_debug_write, NULL);
+
+    /* Initialize audio subsystem */
+    audio_init(isa_bus, pci_bus);
+}
+
+static QEMUMachine prep_machine = {
+    .name = "prep",
+    .desc = "PowerPC PREP platform",
+    .init = ppc_prep_init,
+    .max_cpus = MAX_CPUS,
+    DEFAULT_MACHINE_OPTIONS,
+};
+
+static void prep_machine_init(void)
+{
+    qemu_register_machine(&prep_machine);
+}
+
+machine_init(prep_machine_init);
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
deleted file mode 100644
index a35fbed..0000000
--- a/hw/ppc_prep.c
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * QEMU PPC PREP hardware System Emulator
- *
- * Copyright (c) 2003-2007 Jocelyn Mayer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "hw.h"
-#include "nvram.h"
-#include "pc.h"
-#include "serial.h"
-#include "fdc.h"
-#include "net/net.h"
-#include "sysemu/sysemu.h"
-#include "isa.h"
-#include "pci/pci.h"
-#include "pci/pci_host.h"
-#include "ppc.h"
-#include "boards.h"
-#include "qemu/log.h"
-#include "ide.h"
-#include "loader.h"
-#include "mc146818rtc.h"
-#include "pc87312.h"
-#include "sysemu/blockdev.h"
-#include "sysemu/arch_init.h"
-#include "exec/address-spaces.h"
-
-//#define HARD_DEBUG_PPC_IO
-//#define DEBUG_PPC_IO
-
-/* SMP is not enabled, for now */
-#define MAX_CPUS 1
-
-#define MAX_IDE_BUS 2
-
-#define BIOS_SIZE (1024 * 1024)
-#define BIOS_FILENAME "ppc_rom.bin"
-#define KERNEL_LOAD_ADDR 0x01000000
-#define INITRD_LOAD_ADDR 0x01800000
-
-#if defined (HARD_DEBUG_PPC_IO) && !defined (DEBUG_PPC_IO)
-#define DEBUG_PPC_IO
-#endif
-
-#if defined (HARD_DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...)                         \
-do {                                                     \
-    if (qemu_loglevel_mask(CPU_LOG_IOPORT)) {            \
-        qemu_log("%s: " fmt, __func__ , ## __VA_ARGS__); \
-    } else {                                             \
-        printf("%s : " fmt, __func__ , ## __VA_ARGS__);  \
-    }                                                    \
-} while (0)
-#elif defined (DEBUG_PPC_IO)
-#define PPC_IO_DPRINTF(fmt, ...) \
-qemu_log_mask(CPU_LOG_IOPORT, fmt, ## __VA_ARGS__)
-#else
-#define PPC_IO_DPRINTF(fmt, ...) do { } while (0)
-#endif
-
-/* Constants for devices init */
-static const int ide_iobase[2] = { 0x1f0, 0x170 };
-static const int ide_iobase2[2] = { 0x3f6, 0x376 };
-static const int ide_irq[2] = { 13, 13 };
-
-#define NE2000_NB_MAX 6
-
-static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
-static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
-
-/* ISA IO ports bridge */
-#define PPC_IO_BASE 0x80000000
-
-/* PowerPC control and status registers */
-#if 0 // Not used
-static struct {
-    /* IDs */
-    uint32_t veni_devi;
-    uint32_t revi;
-    /* Control and status */
-    uint32_t gcsr;
-    uint32_t xcfr;
-    uint32_t ct32;
-    uint32_t mcsr;
-    /* General purpose registers */
-    uint32_t gprg[6];
-    /* Exceptions */
-    uint32_t feen;
-    uint32_t fest;
-    uint32_t fema;
-    uint32_t fecl;
-    uint32_t eeen;
-    uint32_t eest;
-    uint32_t eecl;
-    uint32_t eeint;
-    uint32_t eemck0;
-    uint32_t eemck1;
-    /* Error diagnostic */
-} XCSR;
-
-static void PPC_XCSR_writeb (void *opaque,
-                             hwaddr addr, uint32_t value)
-{
-    printf("%s: 0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", __func__, addr,
-           value);
-}
-
-static void PPC_XCSR_writew (void *opaque,
-                             hwaddr addr, uint32_t value)
-{
-    printf("%s: 0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", __func__, addr,
-           value);
-}
-
-static void PPC_XCSR_writel (void *opaque,
-                             hwaddr addr, uint32_t value)
-{
-    printf("%s: 0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", __func__, addr,
-           value);
-}
-
-static uint32_t PPC_XCSR_readb (void *opaque, hwaddr addr)
-{
-    uint32_t retval = 0;
-
-    printf("%s: 0x" TARGET_FMT_plx " <= %08" PRIx32 "\n", __func__, addr,
-           retval);
-
-    return retval;
-}
-
-static uint32_t PPC_XCSR_readw (void *opaque, hwaddr addr)
-{
-    uint32_t retval = 0;
-
-    printf("%s: 0x" TARGET_FMT_plx " <= %08" PRIx32 "\n", __func__, addr,
-           retval);
-
-    return retval;
-}
-
-static uint32_t PPC_XCSR_readl (void *opaque, hwaddr addr)
-{
-    uint32_t retval = 0;
-
-    printf("%s: 0x" TARGET_FMT_plx " <= %08" PRIx32 "\n", __func__, addr,
-           retval);
-
-    return retval;
-}
-
-static const MemoryRegionOps PPC_XCSR_ops = {
-    .old_mmio = {
-        .read = { PPC_XCSR_readb, PPC_XCSR_readw, PPC_XCSR_readl, },
-        .write = { PPC_XCSR_writeb, PPC_XCSR_writew, PPC_XCSR_writel, },
-    },
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-#endif
-
-/* Fake super-io ports for PREP platform (Intel 82378ZB) */
-typedef struct sysctrl_t {
-    qemu_irq reset_irq;
-    M48t59State *nvram;
-    uint8_t state;
-    uint8_t syscontrol;
-    int contiguous_map;
-    int endian;
-} sysctrl_t;
-
-enum {
-    STATE_HARDFILE = 0x01,
-};
-
-static sysctrl_t *sysctrl;
-
-static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
-{
-    sysctrl_t *sysctrl = opaque;
-
-    PPC_IO_DPRINTF("0x%08" PRIx32 " => 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, val);
-    switch (addr) {
-    case 0x0092:
-        /* Special port 92 */
-        /* Check soft reset asked */
-        if (val & 0x01) {
-            qemu_irq_raise(sysctrl->reset_irq);
-        } else {
-            qemu_irq_lower(sysctrl->reset_irq);
-        }
-        /* Check LE mode */
-        if (val & 0x02) {
-            sysctrl->endian = 1;
-        } else {
-            sysctrl->endian = 0;
-        }
-        break;
-    case 0x0800:
-        /* Motorola CPU configuration register : read-only */
-        break;
-    case 0x0802:
-        /* Motorola base module feature register : read-only */
-        break;
-    case 0x0803:
-        /* Motorola base module status register : read-only */
-        break;
-    case 0x0808:
-        /* Hardfile light register */
-        if (val & 1)
-            sysctrl->state |= STATE_HARDFILE;
-        else
-            sysctrl->state &= ~STATE_HARDFILE;
-        break;
-    case 0x0810:
-        /* Password protect 1 register */
-        if (sysctrl->nvram != NULL)
-            m48t59_toggle_lock(sysctrl->nvram, 1);
-        break;
-    case 0x0812:
-        /* Password protect 2 register */
-        if (sysctrl->nvram != NULL)
-            m48t59_toggle_lock(sysctrl->nvram, 2);
-        break;
-    case 0x0814:
-        /* L2 invalidate register */
-        //        tlb_flush(first_cpu, 1);
-        break;
-    case 0x081C:
-        /* system control register */
-        sysctrl->syscontrol = val & 0x0F;
-        break;
-    case 0x0850:
-        /* I/O map type register */
-        sysctrl->contiguous_map = val & 0x01;
-        break;
-    default:
-        printf("ERROR: unaffected IO port write: %04" PRIx32
-               " => %02" PRIx32"\n", addr, val);
-        break;
-    }
-}
-
-static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
-{
-    sysctrl_t *sysctrl = opaque;
-    uint32_t retval = 0xFF;
-
-    switch (addr) {
-    case 0x0092:
-        /* Special port 92 */
-        retval = 0x00;
-        break;
-    case 0x0800:
-        /* Motorola CPU configuration register */
-        retval = 0xEF; /* MPC750 */
-        break;
-    case 0x0802:
-        /* Motorola Base module feature register */
-        retval = 0xAD; /* No ESCC, PMC slot neither ethernet */
-        break;
-    case 0x0803:
-        /* Motorola base module status register */
-        retval = 0xE0; /* Standard MPC750 */
-        break;
-    case 0x080C:
-        /* Equipment present register:
-         *  no L2 cache
-         *  no upgrade processor
-         *  no cards in PCI slots
-         *  SCSI fuse is bad
-         */
-        retval = 0x3C;
-        break;
-    case 0x0810:
-        /* Motorola base module extended feature register */
-        retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */
-        break;
-    case 0x0814:
-        /* L2 invalidate: don't care */
-        break;
-    case 0x0818:
-        /* Keylock */
-        retval = 0x00;
-        break;
-    case 0x081C:
-        /* system control register
-         * 7 - 6 / 1 - 0: L2 cache enable
-         */
-        retval = sysctrl->syscontrol;
-        break;
-    case 0x0823:
-        /* */
-        retval = 0x03; /* no L2 cache */
-        break;
-    case 0x0850:
-        /* I/O map type register */
-        retval = sysctrl->contiguous_map;
-        break;
-    default:
-        printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
-        break;
-    }
-    PPC_IO_DPRINTF("0x%08" PRIx32 " <= 0x%02" PRIx32 "\n",
-                   addr - PPC_IO_BASE, retval);
-
-    return retval;
-}
-
-static inline hwaddr prep_IO_address(sysctrl_t *sysctrl,
-                                                 hwaddr addr)
-{
-    if (sysctrl->contiguous_map == 0) {
-        /* 64 KB contiguous space for IOs */
-        addr &= 0xFFFF;
-    } else {
-        /* 8 MB non-contiguous space for IOs */
-        addr = (addr & 0x1F) | ((addr & 0x007FFF000) >> 7);
-    }
-
-    return addr;
-}
-
-static void PPC_prep_io_writeb (void *opaque, hwaddr addr,
-                                uint32_t value)
-{
-    sysctrl_t *sysctrl = opaque;
-
-    addr = prep_IO_address(sysctrl, addr);
-    cpu_outb(addr, value);
-}
-
-static uint32_t PPC_prep_io_readb (void *opaque, hwaddr addr)
-{
-    sysctrl_t *sysctrl = opaque;
-    uint32_t ret;
-
-    addr = prep_IO_address(sysctrl, addr);
-    ret = cpu_inb(addr);
-
-    return ret;
-}
-
-static void PPC_prep_io_writew (void *opaque, hwaddr addr,
-                                uint32_t value)
-{
-    sysctrl_t *sysctrl = opaque;
-
-    addr = prep_IO_address(sysctrl, addr);
-    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", addr, value);
-    cpu_outw(addr, value);
-}
-
-static uint32_t PPC_prep_io_readw (void *opaque, hwaddr addr)
-{
-    sysctrl_t *sysctrl = opaque;
-    uint32_t ret;
-
-    addr = prep_IO_address(sysctrl, addr);
-    ret = cpu_inw(addr);
-    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " <= 0x%08" PRIx32 "\n", addr, ret);
-
-    return ret;
-}
-
-static void PPC_prep_io_writel (void *opaque, hwaddr addr,
-                                uint32_t value)
-{
-    sysctrl_t *sysctrl = opaque;
-
-    addr = prep_IO_address(sysctrl, addr);
-    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " => 0x%08" PRIx32 "\n", addr, value);
-    cpu_outl(addr, value);
-}
-
-static uint32_t PPC_prep_io_readl (void *opaque, hwaddr addr)
-{
-    sysctrl_t *sysctrl = opaque;
-    uint32_t ret;
-
-    addr = prep_IO_address(sysctrl, addr);
-    ret = cpu_inl(addr);
-    PPC_IO_DPRINTF("0x" TARGET_FMT_plx " <= 0x%08" PRIx32 "\n", addr, ret);
-
-    return ret;
-}
-
-static const MemoryRegionOps PPC_prep_io_ops = {
-    .old_mmio = {
-        .read = { PPC_prep_io_readb, PPC_prep_io_readw, PPC_prep_io_readl },
-        .write = { PPC_prep_io_writeb, PPC_prep_io_writew, PPC_prep_io_writel },
-    },
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-#define NVRAM_SIZE        0x2000
-
-static void cpu_request_exit(void *opaque, int irq, int level)
-{
-    CPUPPCState *env = cpu_single_env;
-
-    if (env && level) {
-        cpu_exit(env);
-    }
-}
-
-static void ppc_prep_reset(void *opaque)
-{
-    PowerPCCPU *cpu = opaque;
-
-    cpu_reset(CPU(cpu));
-}
-
-/* PowerPC PREP hardware initialisation */
-static void ppc_prep_init(QEMUMachineInitArgs *args)
-{
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    MemoryRegion *sysmem = get_system_memory();
-    PowerPCCPU *cpu = NULL;
-    CPUPPCState *env = NULL;
-    char *filename;
-    nvram_t nvram;
-    M48t59State *m48t59;
-    MemoryRegion *PPC_io_memory = g_new(MemoryRegion, 1);
-#if 0
-    MemoryRegion *xcsr = g_new(MemoryRegion, 1);
-#endif
-    int linux_boot, i, nb_nics1, bios_size;
-    MemoryRegion *ram = g_new(MemoryRegion, 1);
-    MemoryRegion *bios = g_new(MemoryRegion, 1);
-    uint32_t kernel_base, initrd_base;
-    long kernel_size, initrd_size;
-    DeviceState *dev;
-    PCIHostState *pcihost;
-    PCIBus *pci_bus;
-    PCIDevice *pci;
-    ISABus *isa_bus;
-    ISADevice *isa;
-    qemu_irq *cpu_exit_irq;
-    int ppc_boot_device;
-    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
-
-    sysctrl = g_malloc0(sizeof(sysctrl_t));
-
-    linux_boot = (kernel_filename != NULL);
-
-    /* init CPUs */
-    if (cpu_model == NULL)
-        cpu_model = "602";
-    for (i = 0; i < smp_cpus; i++) {
-        cpu = cpu_ppc_init(cpu_model);
-        if (cpu == NULL) {
-            fprintf(stderr, "Unable to find PowerPC CPU definition\n");
-            exit(1);
-        }
-        env = &cpu->env;
-
-        if (env->flags & POWERPC_FLAG_RTC_CLK) {
-            /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
-            cpu_ppc_tb_init(env, 7812500UL);
-        } else {
-            /* Set time-base frequency to 100 Mhz */
-            cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
-        }
-        qemu_register_reset(ppc_prep_reset, cpu);
-    }
-
-    /* allocate RAM */
-    memory_region_init_ram(ram, "ppc_prep.ram", ram_size);
-    vmstate_register_ram_global(ram);
-    memory_region_add_subregion(sysmem, 0, ram);
-
-    /* allocate and load BIOS */
-    memory_region_init_ram(bios, "ppc_prep.bios", BIOS_SIZE);
-    memory_region_set_readonly(bios, true);
-    memory_region_add_subregion(sysmem, (uint32_t)(-BIOS_SIZE), bios);
-    vmstate_register_ram_global(bios);
-    if (bios_name == NULL)
-        bios_name = BIOS_FILENAME;
-    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
-    if (filename) {
-        bios_size = get_image_size(filename);
-    } else {
-        bios_size = -1;
-    }
-    if (bios_size > 0 && bios_size <= BIOS_SIZE) {
-        hwaddr bios_addr;
-        bios_size = (bios_size + 0xfff) & ~0xfff;
-        bios_addr = (uint32_t)(-bios_size);
-        bios_size = load_image_targphys(filename, bios_addr, bios_size);
-    }
-    if (bios_size < 0 || bios_size > BIOS_SIZE) {
-        hw_error("qemu: could not load PPC PREP bios '%s'\n", bios_name);
-    }
-    if (filename) {
-        g_free(filename);
-    }
-
-    if (linux_boot) {
-        kernel_base = KERNEL_LOAD_ADDR;
-        /* now we can load the kernel */
-        kernel_size = load_image_targphys(kernel_filename, kernel_base,
-                                          ram_size - kernel_base);
-        if (kernel_size < 0) {
-            hw_error("qemu: could not load kernel '%s'\n", kernel_filename);
-            exit(1);
-        }
-        /* load initrd */
-        if (initrd_filename) {
-            initrd_base = INITRD_LOAD_ADDR;
-            initrd_size = load_image_targphys(initrd_filename, initrd_base,
-                                              ram_size - initrd_base);
-            if (initrd_size < 0) {
-                hw_error("qemu: could not load initial ram disk '%s'\n",
-                          initrd_filename);
-            }
-        } else {
-            initrd_base = 0;
-            initrd_size = 0;
-        }
-        ppc_boot_device = 'm';
-    } else {
-        kernel_base = 0;
-        kernel_size = 0;
-        initrd_base = 0;
-        initrd_size = 0;
-        ppc_boot_device = '\0';
-        /* For now, OHW cannot boot from the network. */
-        for (i = 0; boot_device[i] != '\0'; i++) {
-            if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
-                ppc_boot_device = boot_device[i];
-                break;
-            }
-        }
-        if (ppc_boot_device == '\0') {
-            fprintf(stderr, "No valid boot device for Mac99 machine\n");
-            exit(1);
-        }
-    }
-
-    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
-        hw_error("Only 6xx bus is supported on PREP machine\n");
-    }
-
-    dev = qdev_create(NULL, "raven-pcihost");
-    pcihost = PCI_HOST_BRIDGE(dev);
-    pcihost->address_space = get_system_memory();
-    object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL);
-    qdev_init_nofail(dev);
-    pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
-    if (pci_bus == NULL) {
-        fprintf(stderr, "Couldn't create PCI host controller.\n");
-        exit(1);
-    }
-
-    /* PCI -> ISA bridge */
-    pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378");
-    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
-    qdev_connect_gpio_out(&pci->qdev, 0,
-                          first_cpu->irq_inputs[PPC6xx_INPUT_INT]);
-    qdev_connect_gpio_out(&pci->qdev, 1, *cpu_exit_irq);
-    sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
-    sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
-    sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
-    sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11));
-    isa_bus = DO_UPCAST(ISABus, qbus, qdev_get_child_bus(&pci->qdev, "isa.0"));
-
-    /* Super I/O (parallel + serial ports) */
-    isa = isa_create(isa_bus, TYPE_PC87312);
-    qdev_prop_set_uint8(&isa->qdev, "config", 13); /* fdc, ser0, ser1, par0 */
-    qdev_init_nofail(&isa->qdev);
-
-    /* Register 8 MB of ISA IO space (needed for non-contiguous map) */
-    memory_region_init_io(PPC_io_memory, &PPC_prep_io_ops, sysctrl,
-                          "ppc-io", 0x00800000);
-    memory_region_add_subregion(sysmem, 0x80000000, PPC_io_memory);
-
-    /* init basic PC hardware */
-    pci_vga_init(pci_bus);
-
-    nb_nics1 = nb_nics;
-    if (nb_nics1 > NE2000_NB_MAX)
-        nb_nics1 = NE2000_NB_MAX;
-    for(i = 0; i < nb_nics1; i++) {
-        if (nd_table[i].model == NULL) {
-	    nd_table[i].model = g_strdup("ne2k_isa");
-        }
-        if (strcmp(nd_table[i].model, "ne2k_isa") == 0) {
-            isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i],
-                            &nd_table[i]);
-        } else {
-            pci_nic_init_nofail(&nd_table[i], "ne2k_pci", NULL);
-        }
-    }
-
-    ide_drive_get(hd, MAX_IDE_BUS);
-    for(i = 0; i < MAX_IDE_BUS; i++) {
-        isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i],
-                     hd[2 * i],
-		     hd[2 * i + 1]);
-    }
-    isa_create_simple(isa_bus, "i8042");
-
-    sysctrl->reset_irq = first_cpu->irq_inputs[PPC6xx_INPUT_HRESET];
-    /* System control ports */
-    register_ioport_read(0x0092, 0x01, 1, &PREP_io_800_readb, sysctrl);
-    register_ioport_write(0x0092, 0x01, 1, &PREP_io_800_writeb, sysctrl);
-    register_ioport_read(0x0800, 0x52, 1, &PREP_io_800_readb, sysctrl);
-    register_ioport_write(0x0800, 0x52, 1, &PREP_io_800_writeb, sysctrl);
-    /* PowerPC control and status register group */
-#if 0
-    memory_region_init_io(xcsr, &PPC_XCSR_ops, NULL, "ppc-xcsr", 0x1000);
-    memory_region_add_subregion(sysmem, 0xFEFF0000, xcsr);
-#endif
-
-    if (usb_enabled(false)) {
-        pci_create_simple(pci_bus, -1, "pci-ohci");
-    }
-
-    m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 59);
-    if (m48t59 == NULL)
-        return;
-    sysctrl->nvram = m48t59;
-
-    /* Initialise NVRAM */
-    nvram.opaque = m48t59;
-    nvram.read_fn = &m48t59_read;
-    nvram.write_fn = &m48t59_write;
-    PPC_NVRAM_set_params(&nvram, NVRAM_SIZE, "PREP", ram_size, ppc_boot_device,
-                         kernel_base, kernel_size,
-                         kernel_cmdline,
-                         initrd_base, initrd_size,
-                         /* XXX: need an option to load a NVRAM image */
-                         0,
-                         graphic_width, graphic_height, graphic_depth);
-
-    /* Special port to get debug messages from Open-Firmware */
-    register_ioport_write(0x0F00, 4, 1, &PPC_debug_write, NULL);
-
-    /* Initialize audio subsystem */
-    audio_init(isa_bus, pci_bus);
-}
-
-static QEMUMachine prep_machine = {
-    .name = "prep",
-    .desc = "PowerPC PREP platform",
-    .init = ppc_prep_init,
-    .max_cpus = MAX_CPUS,
-    DEFAULT_MACHINE_OPTIONS,
-};
-
-static void prep_machine_init(void)
-{
-    qemu_register_machine(&prep_machine);
-}
-
-machine_init(prep_machine_init);
commit 74cef80c473fe4ce195d5280a34bb2af8492aabb
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Sat Jan 26 12:54:35 2013 -0800

    xilinx_axidma: Fix debug mode compile messages
    
    Missing cast one one of the conditionally compiled printfs.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/xilinx_axidma.c b/hw/xilinx_axidma.c
index d0ee566..cc51584 100644
--- a/hw/xilinx_axidma.c
+++ b/hw/xilinx_axidma.c
@@ -444,7 +444,7 @@ static void axidma_write(void *opaque, hwaddr addr,
             break;
         default:
             D(qemu_log("%s: ch=%d addr=" TARGET_FMT_plx " v=%x\n",
-                  __func__, sid, addr * 4, value));
+                  __func__, sid, addr * 4, (unsigned)value));
             s->regs[addr] = value;
             break;
     }
commit 080251a46493e4e4cb6cbd6e2878d31c7cf023c5
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Sat Jan 26 12:54:34 2013 -0800

    cadence_gem: Debug mode compile fixes
    
    Some printfs are throwing warnings when debug mode is enabled. Fixed.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/cadence_gem.c b/hw/cadence_gem.c
index 0d83442..b77423d 100644
--- a/hw/cadence_gem.c
+++ b/hw/cadence_gem.c
@@ -687,14 +687,15 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 
     packet_desc_addr = s->rx_desc_addr;
     while (1) {
-        DB_PRINT("read descriptor 0x%x\n", packet_desc_addr);
+        DB_PRINT("read descriptor 0x%x\n", (unsigned)packet_desc_addr);
         /* read current descriptor */
         cpu_physical_memory_read(packet_desc_addr,
                                  (uint8_t *)&desc[0], sizeof(desc));
 
         /* Descriptor owned by software ? */
         if (rx_desc_get_ownership(desc) == 1) {
-            DB_PRINT("descriptor 0x%x owned by sw.\n", packet_desc_addr);
+            DB_PRINT("descriptor 0x%x owned by sw.\n",
+                     (unsigned)packet_desc_addr);
             s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF;
             /* Handle interrupt consequences */
             gem_update_int_status(s);
@@ -709,7 +710,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
          */
         if (rx_desc_get_buffer(desc) == 0) {
             DB_PRINT("Invalid RX buffer (NULL) for descriptor 0x%x\n",
-                       packet_desc_addr);
+                     (unsigned)packet_desc_addr);
             break;
         }
 
@@ -749,7 +750,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
         s->rx_desc_addr += 8;
     }
 
-    DB_PRINT("set SOF, OWN on descriptor 0x%08x\n", packet_desc_addr);
+    DB_PRINT("set SOF, OWN on descriptor 0x%08x\n", (unsigned)packet_desc_addr);
 
     /* Count it */
     gem_receive_updatestats(s, buf, size);
@@ -861,7 +862,8 @@ static void gem_transmit(GemState *s)
          */
         if ((tx_desc_get_buffer(desc) == 0) ||
             (tx_desc_get_length(desc) == 0)) {
-            DB_PRINT("Invalid TX descriptor @ 0x%x\n", packet_desc_addr);
+            DB_PRINT("Invalid TX descriptor @ 0x%x\n",
+                     (unsigned)packet_desc_addr);
             break;
         }
 
@@ -1031,10 +1033,11 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
     offset >>= 2;
     retval = s->regs[offset];
 
-    DB_PRINT("offset: 0x%04x read: 0x%08x\n", offset*4, retval);
+    DB_PRINT("offset: 0x%04x read: 0x%08x\n", (unsigned)offset*4, retval);
 
     switch (offset) {
     case GEM_ISR:
+        DB_PRINT("lowering irq on ISR read\n");
         qemu_set_irq(s->irq, 0);
         break;
     case GEM_PHYMNTNC:
@@ -1073,7 +1076,7 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val,
     GemState *s = (GemState *)opaque;
     uint32_t readonly;
 
-    DB_PRINT("offset: 0x%04x write: 0x%08x ", offset, (unsigned)val);
+    DB_PRINT("offset: 0x%04x write: 0x%08x ", (unsigned)offset, (unsigned)val);
     offset >>= 2;
 
     /* Squash bits which are read only in write value */
commit c69544130f50536c61686a9ec6734327b6174ee0
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Sat Jan 26 12:54:33 2013 -0800

    cadence_ttc: Debug mode compile fixes
    
    Some printfs are throwing warnings when debug mode is enabled. Fixed.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/cadence_ttc.c b/hw/cadence_ttc.c
index 2a8fadd..67028a3 100644
--- a/hw/cadence_ttc.c
+++ b/hw/cadence_ttc.c
@@ -302,7 +302,7 @@ static uint64_t cadence_ttc_read(void *opaque, hwaddr offset,
 {
     uint32_t ret = cadence_ttc_read_imp(opaque, offset);
 
-    DB_PRINT("addr: %08x data: %08x\n", offset, ret);
+    DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, (unsigned)ret);
     return ret;
 }
 
@@ -311,7 +311,7 @@ static void cadence_ttc_write(void *opaque, hwaddr offset,
 {
     CadenceTimerState *s = cadence_timer_from_addr(opaque, offset);
 
-    DB_PRINT("addr: %08x data %08x\n", offset, (unsigned)value);
+    DB_PRINT("addr: %08x data %08x\n", (unsigned)offset, (unsigned)value);
 
     cadence_timer_sync(s);
 
commit cfba8e6f92d45a2374622c3dc57499e42a1c07e1
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jan 25 10:31:16 2013 +0100

    vnc: Clean up vncws_send_handshake_response()
    
    Use appropriate types, drop superfluous casts, use sizeof, don't
    exploit that this particular call of gnutls_fingerprint() doesn't
    change its last argument.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/ui/vnc-ws.c b/ui/vnc-ws.c
index 9ccdc19..3e30209 100644
--- a/ui/vnc-ws.c
+++ b/ui/vnc-ws.c
@@ -120,10 +120,11 @@ static char *vncws_extract_handshake_entry(const char *handshake,
 static void vncws_send_handshake_response(VncState *vs, const char* key)
 {
     char combined_key[WS_CLIENT_KEY_LEN + WS_GUID_LEN + 1];
-    char hash[SHA1_DIGEST_LEN];
-    size_t hash_size = SHA1_DIGEST_LEN;
+    unsigned char hash[SHA1_DIGEST_LEN];
+    size_t hash_size = sizeof(hash);
     char *accept = NULL, *response = NULL;
     gnutls_datum_t in;
+    int ret;
 
     g_strlcpy(combined_key, key, WS_CLIENT_KEY_LEN + 1);
     g_strlcat(combined_key, WS_GUID, WS_CLIENT_KEY_LEN + WS_GUID_LEN + 1);
@@ -131,9 +132,9 @@ static void vncws_send_handshake_response(VncState *vs, const char* key)
     /* hash and encode it */
     in.data = (void *)combined_key;
     in.size = WS_CLIENT_KEY_LEN + WS_GUID_LEN;
-    if (gnutls_fingerprint(GNUTLS_DIG_SHA1, &in, hash, &hash_size)
-            == GNUTLS_E_SUCCESS) {
-        accept = g_base64_encode((guchar *)hash, SHA1_DIGEST_LEN);
+    ret = gnutls_fingerprint(GNUTLS_DIG_SHA1, &in, hash, &hash_size);
+    if (ret == GNUTLS_E_SUCCESS && hash_size <= SHA1_DIGEST_LEN) {
+        accept = g_base64_encode(hash, hash_size);
     }
     if (accept == NULL) {
         VNC_DEBUG("Hashing Websocket combined key failed\n");
commit 0893d46014b0300fb8aec92df94effea34d04b61
Merge: 0c77067 f8bb056
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Tue Jan 29 16:57:41 2013 -0600

    Merge remote-tracking branch 'qemu-kvm/uq/master' into staging
    
    * qemu-kvm/uq/master:
      target-i386: kvm: prevent buffer overflow if -cpu foo, [x]level is too big
      vmxcap: bit 9 of VMX_PROCBASED_CTLS2 is 'virtual interrupt delivery'
    
    Conflicts:
    	target-i386/kvm.c
    
    Trivial merge resolution due to lack of context.
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --cc target-i386/kvm.c
index c440809,4ecb728..9ebf181
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@@ -411,12 -411,7 +411,14 @@@ static void cpu_update_state(void *opaq
      }
  }
  
 +unsigned long kvm_arch_vcpu_id(CPUState *cs)
 +{
 +    X86CPU *cpu = X86_CPU(cs);
 +    return cpu->env.cpuid_apic_id;
 +}
 +
+ #define KVM_MAX_CPUID_ENTRIES  100
++
  int kvm_arch_init_vcpu(CPUState *cs)
  {
      struct {
commit 0c77067950e0b158ea7d825cfa7630bfd09b968a
Merge: ec9466f 10a2158
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Tue Jan 29 16:57:04 2013 -0600

    Merge remote-tracking branch 'mdroth/qga-pull-1-28-13' into staging
    
    # By Markus Armbruster
    # Via Michael Roth
    * mdroth/qga-pull-1-28-13:
      qemu-ga: Plug leaks on qmp_guest_network_get_interfaces() error paths
      qemu-ga: Plug memory leak in guest_fsfreeze_cleanup()

commit 19380b1bf587fd962a60fb40cc4927ba999cf17b
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Tue Jan 29 16:33:04 2013 +0100

    s390: Drop set_bit usage in virtio_ccw.
    
    set_bit on indicators doesn't go well on 32 bit targets:
    
    note: expected 'long unsigned int *' but argument is of type 'uint64_t *'
    
    Switch to bit shifts instead.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    [agraf: use 1ULL instead]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 7d7f336..231f81e 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -662,12 +662,12 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
 
     if (vector < VIRTIO_PCI_QUEUE_MAX) {
         indicators = ldq_phys(dev->indicators);
-        set_bit(vector, &indicators);
+        indicators |= 1ULL << vector;
         stq_phys(dev->indicators, indicators);
     } else {
         vector = 0;
         indicators = ldq_phys(dev->indicators2);
-        set_bit(vector, &indicators);
+        indicators |= 1ULL << vector;
         stq_phys(dev->indicators2, indicators);
     }
 
commit 8d034a6fad4c580be3ed4a15f24e0bf47aa92d15
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Mon Jan 28 17:01:30 2013 +0100

    s390: css error codes.
    
    Changed error codes in the channel subsystem / virtio-ccw code
    (-EOPNOTSUPP -> -ENOSYS, -ERESTART -> -EINPROGRESS).
    
    This should hopefully fix building on mingw32.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Reviewed-by: Stefan Weil <sw at weilnetz.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 84efd4a..3244201 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -223,7 +223,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
     }
 
     if (ccw.flags & CCW_FLAG_SUSPEND) {
-        return -ERESTART;
+        return -EINPROGRESS;
     }
 
     check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
@@ -291,7 +291,7 @@ static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
             /* Handle device specific commands. */
             ret = sch->ccw_cb(sch, ccw);
         } else {
-            ret = -EOPNOTSUPP;
+            ret = -ENOSYS;
         }
         break;
     }
@@ -347,7 +347,7 @@ static void sch_handle_start_func(SubchDev *sch)
                     SCSW_STCTL_STATUS_PEND;
             s->dstat = SCSW_DSTAT_CHANNEL_END | SCSW_DSTAT_DEVICE_END;
             break;
-        case -EOPNOTSUPP:
+        case -ENOSYS:
             /* unsupported command, generate unit check (command reject) */
             s->ctrl &= ~SCSW_ACTL_START_PEND;
             s->dstat = SCSW_DSTAT_UNIT_CHECK;
@@ -372,7 +372,7 @@ static void sch_handle_start_func(SubchDev *sch)
             s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
             s->ctrl |= SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
             break;
-        case -ERESTART:
+        case -EINPROGRESS:
             /* channel program has been suspended */
             s->ctrl &= ~SCSW_ACTL_START_PEND;
             s->ctrl |= SCSW_ACTL_SUSP;
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 8c9b745..7d7f336 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -384,7 +384,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         }
         break;
     default:
-        ret = -EOPNOTSUPP;
+        ret = -ENOSYS;
         break;
     }
     return ret;
commit 50c8d9bfc6d9ca48b978c6201f97bd850bd0dc83
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Sun Jan 27 23:59:26 2013 +0000

    s390: Use s390_cpu_physical_memory_map for tpi.
    
    Map the I/O interruption code before calling into css.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 113ac9a..84efd4a 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -852,7 +852,7 @@ int css_do_stcrw(CRW *crw)
     return ret;
 }
 
-int css_do_tpi(uint64_t addr, int lowcore)
+int css_do_tpi(IOIntCode *int_code, int lowcore)
 {
     /* No pending interrupts for !KVM. */
     return 0;
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index ce12fa4..9be4a47 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -413,7 +413,7 @@ int css_do_hsch(SubchDev *sch);
 int css_do_ssch(SubchDev *sch, ORB *orb);
 int css_do_tsch(SubchDev *sch, IRB *irb);
 int css_do_stcrw(CRW *crw);
-int css_do_tpi(uint64_t addr, int lowcore);
+int css_do_tpi(IOIntCode *int_code, int lowcore);
 int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
                          int rfmt, void *buf);
 void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo);
@@ -471,7 +471,7 @@ static inline int css_do_stcrw(CRW *crw)
 {
     return 1;
 }
-static inline int css_do_tpi(uint64_t addr, int lowcore)
+static inline int css_do_tpi(IOIntCode *int_code, int lowcore)
 {
     return 0;
 }
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
index 4ef2d73..e3531f3 100644
--- a/target-s390x/ioinst.c
+++ b/target-s390x/ioinst.c
@@ -619,16 +619,25 @@ int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb)
 {
     uint64_t addr;
     int lowcore;
+    IOIntCode *int_code;
+    hwaddr len, orig_len;
+    int ret;
 
     trace_ioinst("tpi");
     addr = decode_basedisp_s(env, ipb);
     lowcore = addr ? 0 : 1;
-    if (addr < 8192) {
-        addr += env->psa;
-    } else if ((env->psa <= addr) && (addr < env->psa + 8192)) {
-        addr -= env->psa;
+    len = lowcore ? 8 /* two words */ : 12 /* three words */;
+    orig_len = len;
+    int_code = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!int_code || (len != orig_len)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        ret = -EIO;
+        goto out;
     }
-    return css_do_tpi(addr, lowcore);
+    ret = css_do_tpi(int_code, lowcore);
+out:
+    s390_cpu_physical_memory_unmap(env, int_code, len, 1);
+    return ret;
 }
 
 #define SCHM_REG1_RES(_reg) (_reg & 0x000000000ffffffc)
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
index a59742c..d5a43f4 100644
--- a/target-s390x/ioinst.h
+++ b/target-s390x/ioinst.h
@@ -195,6 +195,13 @@ typedef struct CRW {
 #define CRW_RSC_SUBCH 0x3
 #define CRW_RSC_CHP   0x4
 
+/* I/O interruption code */
+typedef struct IOIntCode {
+    uint32_t subsys_id;
+    uint32_t intparm;
+    uint32_t interrupt_id;
+} QEMU_PACKED IOIntCode;
+
 /* schid disintegration */
 #define IOINST_SCHID_ONE(_schid)   ((_schid & 0x00010000) >> 16)
 #define IOINST_SCHID_M(_schid)     ((_schid & 0x00080000) >> 19)
commit 2e788490d83a4de5bea00207c8695edd5d487e4a
Author: Christian Borntraeger <borntraeger at de.ibm.com>
Date:   Fri Jan 25 06:00:19 2013 +0000

    sclpconsole: Don't instantiate sclpconsole with -nodefaults
    
    libvirt specifies nodefaults and creates an sclp console with special
    parameters. Let qemu follow nodefaults and don't create an sclp
    console if nodefaults is specified.
    
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/vl.c b/vl.c
index 8b0961e..910abb6 100644
--- a/vl.c
+++ b/vl.c
@@ -3652,6 +3652,7 @@ int main(int argc, char **argv, char **envp)
                 default_serial = 0;
                 default_parallel = 0;
                 default_virtcon = 0;
+                default_sclp = 0;
                 default_monitor = 0;
                 default_net = 0;
                 default_floppy = 0;
commit a5c95808bac7d995378b3835e10011775c7c2d0b
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 06:08:56 2013 +0000

    s390: Add s390-ccw-virtio machine.
    
    Add a new machine type, s390-ccw-virtio, making use of the
    virtio-ccw transport to present virtio devices as channel
    devices.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index e4ee456..9f2f419 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -5,4 +5,5 @@ obj-y += event-facility.o
 obj-y += sclpquiesce.o sclpconsole.o
 obj-y += ipl.o
 obj-y += css.o
+obj-y += s390-virtio-ccw.o
 obj-y += virtio-ccw.o
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
new file mode 100644
index 0000000..6549211
--- /dev/null
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -0,0 +1,134 @@
+/*
+ * virtio ccw machine
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "hw/boards.h"
+#include "exec/address-spaces.h"
+#include "s390-virtio.h"
+#include "sclp.h"
+#include "ioinst.h"
+#include "css.h"
+#include "virtio-ccw.h"
+
+static int virtio_ccw_hcall_notify(const uint64_t *args)
+{
+    uint64_t subch_id = args[0];
+    uint64_t queue = args[1];
+    SubchDev *sch;
+    int cssid, ssid, schid, m;
+
+    if (ioinst_disassemble_sch_ident(subch_id, &m, &cssid, &ssid, &schid)) {
+        return -EINVAL;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (!sch || !css_subch_visible(sch)) {
+        return -EINVAL;
+    }
+    virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
+    return 0;
+
+}
+
+static int virtio_ccw_hcall_early_printk(const uint64_t *args)
+{
+    uint64_t mem = args[0];
+
+    if (mem < ram_size) {
+        /* Early printk */
+        return 0;
+    }
+    return -EINVAL;
+}
+
+static void virtio_ccw_register_hcalls(void)
+{
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY,
+                                   virtio_ccw_hcall_notify);
+    /* Tolerate early printk. */
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_NOTIFY,
+                                   virtio_ccw_hcall_early_printk);
+}
+
+static void ccw_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t my_ram_size = args->ram_size;
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    int shift = 0;
+    uint8_t *storage_keys;
+    int ret;
+    VirtualCssBus *css_bus;
+
+    /* s390x ram size detection needs a 16bit multiplier + an increment. So
+       guests > 64GB can be specified in 2MB steps etc. */
+    while ((my_ram_size >> (20 + shift)) > 65535) {
+        shift++;
+    }
+    my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
+
+    /* lets propagate the changed ram size into the global variable. */
+    ram_size = my_ram_size;
+
+    /* get a BUS */
+    css_bus = virtual_css_bus_init();
+    s390_sclp_init();
+    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+                      args->initrd_filename);
+
+    /* register hypercalls */
+    virtio_ccw_register_hcalls();
+
+    /* allocate RAM */
+    memory_region_init_ram(ram, "s390.ram", my_ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(sysmem, 0, ram);
+
+    /* allocate storage keys */
+    storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
+
+    /* init CPUs */
+    s390_init_cpus(args->cpu_model, storage_keys);
+
+    if (kvm_enabled()) {
+        kvm_s390_enable_css_support(s390_cpu_addr2state(0));
+    }
+    /*
+     * Create virtual css and set it as default so that non mcss-e
+     * enabled guests only see virtio devices.
+     */
+    ret = css_create_css_image(VIRTUAL_CSSID, true);
+    assert(ret == 0);
+
+    /* Create VirtIO network adapters */
+    s390_create_virtio_net(BUS(css_bus), "virtio-net-ccw");
+}
+
+static QEMUMachine ccw_machine = {
+    .name = "s390-ccw-virtio",
+    .alias = "s390-ccw",
+    .desc = "VirtIO-ccw based S390 machine",
+    .init = ccw_init,
+    .block_default_type = IF_VIRTIO,
+    .no_cdrom = 1,
+    .no_floppy = 1,
+    .no_serial = 1,
+    .no_parallel = 1,
+    .no_sdcard = 1,
+    .use_sclp = 1,
+    .max_cpus = 255,
+    DEFAULT_MACHINE_OPTIONS,
+};
+
+static void ccw_machine_init(void)
+{
+    qemu_register_machine(&ccw_machine);
+}
+
+machine_init(ccw_machine_init)
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
index 67bfd20..a6c4c19 100644
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -15,6 +15,7 @@
 #define KVM_S390_VIRTIO_NOTIFY          0
 #define KVM_S390_VIRTIO_RESET           1
 #define KVM_S390_VIRTIO_SET_STATUS      2
+#define KVM_S390_VIRTIO_CCW_NOTIFY      3
 
 typedef int (*s390_virtio_fn)(const uint64_t *args);
 void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
commit ab290630fad0df42ee94a81b20c48d0da2dc6f66
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Tue Jan 22 23:24:08 2013 +0000

    s390-virtio: Check for NULL device in reset hypercall
    
    s390_virtio_bus_find_mem() may return a NULL VirtIOS390Device.
    If called with, e.g., args[0] == 0, this leads to a segfault.
    Fix this by adding error handling as done for other hypercalls.
    
    Present since baf0b55a9e57b909b1f8b0f732c0b10242867418 (Implement
    virtio reset).
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index a8a489d..2a1d9ac 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -86,6 +86,9 @@ static int s390_virtio_hcall_reset(const uint64_t *args)
     VirtIOS390Device *dev;
 
     dev = s390_virtio_bus_find_mem(s390_bus, mem);
+    if (dev == NULL) {
+        return -EINVAL;
+    }
     virtio_reset(dev->vdev);
     stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
     s390_virtio_device_sync(dev);
commit b73d35311098585dbdb375fdf8369b16c8222e12
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Jan 25 01:16:39 2013 +0100

    s390: Move hw files to hw/s390x
    
    This moves all files only used by s390 system emulation to hw/s390x.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Acked-by: Christian Borntraeger <borntraeger at de.ibm.com>

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
deleted file mode 100644
index 6858db0..0000000
--- a/hw/s390-virtio-bus.c
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * QEMU S390 virtio target
- *
- * Copyright (c) 2009 Alexander Graf <agraf at suse.de>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "hw.h"
-#include "block/block.h"
-#include "sysemu/sysemu.h"
-#include "boards.h"
-#include "monitor/monitor.h"
-#include "loader.h"
-#include "elf.h"
-#include "hw/virtio.h"
-#include "hw/virtio-rng.h"
-#include "hw/virtio-serial.h"
-#include "hw/virtio-net.h"
-#include "hw/sysbus.h"
-#include "sysemu/kvm.h"
-
-#include "hw/s390-virtio-bus.h"
-#include "hw/virtio-bus.h"
-
-/* #define DEBUG_S390 */
-
-#ifdef DEBUG_S390
-#define dprintf(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define dprintf(fmt, ...) \
-    do { } while (0)
-#endif
-
-#define VIRTIO_EXT_CODE   0x2603
-
-static const TypeInfo s390_virtio_bus_info = {
-    .name = TYPE_S390_VIRTIO_BUS,
-    .parent = TYPE_BUS,
-    .instance_size = sizeof(VirtIOS390Bus),
-};
-
-static const VirtIOBindings virtio_s390_bindings;
-
-static ram_addr_t s390_virtio_device_num_vq(VirtIOS390Device *dev);
-
-/* length of VirtIO device pages */
-const hwaddr virtio_size = S390_DEVICE_PAGES * TARGET_PAGE_SIZE;
-
-static void s390_virtio_bus_reset(void *opaque)
-{
-    VirtIOS390Bus *bus = opaque;
-    bus->next_ring = bus->dev_page + TARGET_PAGE_SIZE;
-}
-
-void s390_virtio_reset_idx(VirtIOS390Device *dev)
-{
-    int i;
-    hwaddr idx_addr;
-    uint8_t num_vq;
-
-    num_vq = s390_virtio_device_num_vq(dev);
-    for (i = 0; i < num_vq; i++) {
-        idx_addr = virtio_queue_get_avail_addr(dev->vdev, i) +
-            VIRTIO_VRING_AVAIL_IDX_OFFS;
-        stw_phys(idx_addr, 0);
-        idx_addr = virtio_queue_get_used_addr(dev->vdev, i) +
-            VIRTIO_VRING_USED_IDX_OFFS;
-        stw_phys(idx_addr, 0);
-    }
-}
-
-VirtIOS390Bus *s390_virtio_bus_init(ram_addr_t *ram_size)
-{
-    VirtIOS390Bus *bus;
-    BusState *_bus;
-    DeviceState *dev;
-
-    /* Create bridge device */
-    dev = qdev_create(NULL, "s390-virtio-bridge");
-    qdev_init_nofail(dev);
-
-    /* Create bus on bridge device */
-
-    _bus = qbus_create(TYPE_S390_VIRTIO_BUS, dev, "s390-virtio");
-    bus = DO_UPCAST(VirtIOS390Bus, bus, _bus);
-
-    bus->dev_page = *ram_size;
-    bus->dev_offs = bus->dev_page;
-    bus->next_ring = bus->dev_page + TARGET_PAGE_SIZE;
-
-    /* Enable hotplugging */
-    _bus->allow_hotplug = 1;
-
-    /* Allocate RAM for VirtIO device pages (descriptors, queues, rings) */
-    *ram_size += S390_DEVICE_PAGES * TARGET_PAGE_SIZE;
-
-    qemu_register_reset(s390_virtio_bus_reset, bus);
-    return bus;
-}
-
-static void s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token)
-{
-    CPUS390XState *env = &cpu->env;
-
-    if (kvm_enabled()) {
-        kvm_s390_virtio_irq(cpu, config_change, token);
-    } else {
-        cpu_inject_ext(env, VIRTIO_EXT_CODE, config_change, token);
-    }
-}
-
-static int s390_virtio_device_init(VirtIOS390Device *dev, VirtIODevice *vdev)
-{
-    VirtIOS390Bus *bus;
-    int dev_len;
-
-    bus = DO_UPCAST(VirtIOS390Bus, bus, dev->qdev.parent_bus);
-    dev->vdev = vdev;
-    dev->dev_offs = bus->dev_offs;
-    dev->feat_len = sizeof(uint32_t); /* always keep 32 bits features */
-
-    dev_len = VIRTIO_DEV_OFFS_CONFIG;
-    dev_len += s390_virtio_device_num_vq(dev) * VIRTIO_VQCONFIG_LEN;
-    dev_len += dev->feat_len * 2;
-    dev_len += vdev->config_len;
-
-    bus->dev_offs += dev_len;
-
-    virtio_bind_device(vdev, &virtio_s390_bindings, DEVICE(dev));
-    dev->host_features = vdev->get_features(vdev, dev->host_features);
-    s390_virtio_device_sync(dev);
-    s390_virtio_reset_idx(dev);
-    if (dev->qdev.hotplugged) {
-        S390CPU *cpu = s390_cpu_addr2state(0);
-        s390_virtio_irq(cpu, VIRTIO_PARAM_DEV_ADD, dev->dev_offs);
-    }
-
-    return 0;
-}
-
-static int s390_virtio_net_init(VirtIOS390Device *dev)
-{
-    VirtIODevice *vdev;
-
-    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
-    if (!vdev) {
-        return -1;
-    }
-
-    return s390_virtio_device_init(dev, vdev);
-}
-
-static int s390_virtio_blk_init(VirtIOS390Device *dev)
-{
-    VirtIODevice *vdev;
-
-    vdev = virtio_blk_init((DeviceState *)dev, &dev->blk);
-    if (!vdev) {
-        return -1;
-    }
-
-    return s390_virtio_device_init(dev, vdev);
-}
-
-static int s390_virtio_serial_init(VirtIOS390Device *dev)
-{
-    VirtIOS390Bus *bus;
-    VirtIODevice *vdev;
-    int r;
-
-    bus = DO_UPCAST(VirtIOS390Bus, bus, dev->qdev.parent_bus);
-
-    vdev = virtio_serial_init((DeviceState *)dev, &dev->serial);
-    if (!vdev) {
-        return -1;
-    }
-
-    r = s390_virtio_device_init(dev, vdev);
-    if (!r) {
-        bus->console = dev;
-    }
-
-    return r;
-}
-
-static int s390_virtio_scsi_init(VirtIOS390Device *dev)
-{
-    VirtIODevice *vdev;
-
-    vdev = virtio_scsi_init((DeviceState *)dev, &dev->scsi);
-    if (!vdev) {
-        return -1;
-    }
-
-    return s390_virtio_device_init(dev, vdev);
-}
-
-static int s390_virtio_rng_init(VirtIOS390Device *dev)
-{
-    VirtIODevice *vdev;
-
-    vdev = virtio_rng_init((DeviceState *)dev, &dev->rng);
-    if (!vdev) {
-        return -1;
-    }
-
-    return s390_virtio_device_init(dev, vdev);
-}
-
-static uint64_t s390_virtio_device_vq_token(VirtIOS390Device *dev, int vq)
-{
-    ram_addr_t token_off;
-
-    token_off = (dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG) +
-                (vq * VIRTIO_VQCONFIG_LEN) +
-                VIRTIO_VQCONFIG_OFFS_TOKEN;
-
-    return ldq_be_phys(token_off);
-}
-
-static ram_addr_t s390_virtio_device_num_vq(VirtIOS390Device *dev)
-{
-    VirtIODevice *vdev = dev->vdev;
-    int num_vq;
-
-    for (num_vq = 0; num_vq < VIRTIO_PCI_QUEUE_MAX; num_vq++) {
-        if (!virtio_queue_get_num(vdev, num_vq)) {
-            break;
-        }
-    }
-
-    return num_vq;
-}
-
-static ram_addr_t s390_virtio_next_ring(VirtIOS390Bus *bus)
-{
-    ram_addr_t r = bus->next_ring;
-
-    bus->next_ring += VIRTIO_RING_LEN;
-    return r;
-}
-
-void s390_virtio_device_sync(VirtIOS390Device *dev)
-{
-    VirtIOS390Bus *bus = DO_UPCAST(VirtIOS390Bus, bus, dev->qdev.parent_bus);
-    ram_addr_t cur_offs;
-    uint8_t num_vq;
-    int i;
-
-    virtio_reset(dev->vdev);
-
-    /* Sync dev space */
-    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_TYPE, dev->vdev->device_id);
-
-    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ, s390_virtio_device_num_vq(dev));
-    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_FEATURE_LEN, dev->feat_len);
-
-    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG_LEN, dev->vdev->config_len);
-
-    num_vq = s390_virtio_device_num_vq(dev);
-    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ, num_vq);
-
-    /* Sync virtqueues */
-    for (i = 0; i < num_vq; i++) {
-        ram_addr_t vq = (dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG) +
-                        (i * VIRTIO_VQCONFIG_LEN);
-        ram_addr_t vring;
-
-        vring = s390_virtio_next_ring(bus);
-        virtio_queue_set_addr(dev->vdev, i, vring);
-        virtio_queue_set_vector(dev->vdev, i, i);
-        stq_be_phys(vq + VIRTIO_VQCONFIG_OFFS_ADDRESS, vring);
-        stw_be_phys(vq + VIRTIO_VQCONFIG_OFFS_NUM, virtio_queue_get_num(dev->vdev, i));
-    }
-
-    cur_offs = dev->dev_offs;
-    cur_offs += VIRTIO_DEV_OFFS_CONFIG;
-    cur_offs += num_vq * VIRTIO_VQCONFIG_LEN;
-
-    /* Sync feature bitmap */
-    stl_le_phys(cur_offs, dev->host_features);
-
-    dev->feat_offs = cur_offs + dev->feat_len;
-    cur_offs += dev->feat_len * 2;
-
-    /* Sync config space */
-    if (dev->vdev->get_config) {
-        dev->vdev->get_config(dev->vdev, dev->vdev->config);
-    }
-
-    cpu_physical_memory_write(cur_offs,
-                              dev->vdev->config, dev->vdev->config_len);
-    cur_offs += dev->vdev->config_len;
-}
-
-void s390_virtio_device_update_status(VirtIOS390Device *dev)
-{
-    VirtIODevice *vdev = dev->vdev;
-    uint32_t features;
-
-    virtio_set_status(vdev, ldub_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS));
-
-    /* Update guest supported feature bitmap */
-
-    features = bswap32(ldl_be_phys(dev->feat_offs));
-    virtio_set_features(vdev, features);
-}
-
-VirtIOS390Device *s390_virtio_bus_console(VirtIOS390Bus *bus)
-{
-    return bus->console;
-}
-
-/* Find a device by vring address */
-VirtIOS390Device *s390_virtio_bus_find_vring(VirtIOS390Bus *bus,
-                                             ram_addr_t mem,
-                                             int *vq_num)
-{
-    BusChild *kid;
-    int i;
-
-    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
-        VirtIOS390Device *dev = (VirtIOS390Device *)kid->child;
-
-        for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
-            if (!virtio_queue_get_addr(dev->vdev, i))
-                break;
-            if (virtio_queue_get_addr(dev->vdev, i) == mem) {
-                if (vq_num) {
-                    *vq_num = i;
-                }
-                return dev;
-            }
-        }
-    }
-
-    return NULL;
-}
-
-/* Find a device by device descriptor location */
-VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem)
-{
-    BusChild *kid;
-
-    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
-        VirtIOS390Device *dev = (VirtIOS390Device *)kid->child;
-        if (dev->dev_offs == mem) {
-            return dev;
-        }
-    }
-
-    return NULL;
-}
-
-/* DeviceState to VirtIOS390Device. Note: used on datapath,
- * be careful and test performance if you change this.
- */
-static inline VirtIOS390Device *to_virtio_s390_device_fast(DeviceState *d)
-{
-    return container_of(d, VirtIOS390Device, qdev);
-}
-
-/* DeviceState to VirtIOS390Device. TODO: use QOM. */
-static inline VirtIOS390Device *to_virtio_s390_device(DeviceState *d)
-{
-    return container_of(d, VirtIOS390Device, qdev);
-}
-
-static void virtio_s390_notify(DeviceState *d, uint16_t vector)
-{
-    VirtIOS390Device *dev = to_virtio_s390_device_fast(d);
-    uint64_t token = s390_virtio_device_vq_token(dev, vector);
-    S390CPU *cpu = s390_cpu_addr2state(0);
-
-    s390_virtio_irq(cpu, 0, token);
-}
-
-static unsigned virtio_s390_get_features(DeviceState *d)
-{
-    VirtIOS390Device *dev = to_virtio_s390_device(d);
-    return dev->host_features;
-}
-
-/**************** S390 Virtio Bus Device Descriptions *******************/
-
-static const VirtIOBindings virtio_s390_bindings = {
-    .notify = virtio_s390_notify,
-    .get_features = virtio_s390_get_features,
-};
-
-static Property s390_virtio_net_properties[] = {
-    DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
-    DEFINE_PROP_UINT32("x-txtimer", VirtIOS390Device,
-                       net.txtimer, TX_TIMER_INTERVAL),
-    DEFINE_PROP_INT32("x-txburst", VirtIOS390Device,
-                      net.txburst, TX_BURST),
-    DEFINE_PROP_STRING("tx", VirtIOS390Device, net.tx),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void s390_virtio_net_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
-
-    k->init = s390_virtio_net_init;
-    dc->props = s390_virtio_net_properties;
-}
-
-static const TypeInfo s390_virtio_net = {
-    .name          = "virtio-net-s390",
-    .parent        = TYPE_VIRTIO_S390_DEVICE,
-    .instance_size = sizeof(VirtIOS390Device),
-    .class_init    = s390_virtio_net_class_init,
-};
-
-static Property s390_virtio_blk_properties[] = {
-    DEFINE_BLOCK_PROPERTIES(VirtIOS390Device, blk.conf),
-    DEFINE_BLOCK_CHS_PROPERTIES(VirtIOS390Device, blk.conf),
-    DEFINE_PROP_STRING("serial", VirtIOS390Device, blk.serial),
-#ifdef __linux__
-    DEFINE_PROP_BIT("scsi", VirtIOS390Device, blk.scsi, 0, true),
-#endif
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void s390_virtio_blk_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
-
-    k->init = s390_virtio_blk_init;
-    dc->props = s390_virtio_blk_properties;
-}
-
-static const TypeInfo s390_virtio_blk = {
-    .name          = "virtio-blk-s390",
-    .parent        = TYPE_VIRTIO_S390_DEVICE,
-    .instance_size = sizeof(VirtIOS390Device),
-    .class_init    = s390_virtio_blk_class_init,
-};
-
-static Property s390_virtio_serial_properties[] = {
-    DEFINE_PROP_UINT32("max_ports", VirtIOS390Device,
-                       serial.max_virtserial_ports, 31),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void s390_virtio_serial_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
-
-    k->init = s390_virtio_serial_init;
-    dc->props = s390_virtio_serial_properties;
-}
-
-static const TypeInfo s390_virtio_serial = {
-    .name          = "virtio-serial-s390",
-    .parent        = TYPE_VIRTIO_S390_DEVICE,
-    .instance_size = sizeof(VirtIOS390Device),
-    .class_init    = s390_virtio_serial_class_init,
-};
-
-static void s390_virtio_rng_initfn(Object *obj)
-{
-    VirtIOS390Device *dev = VIRTIO_S390_DEVICE(obj);
-
-    object_property_add_link(obj, "rng", TYPE_RNG_BACKEND,
-                             (Object **)&dev->rng.rng, NULL);
-}
-
-static void s390_virtio_rng_class_init(ObjectClass *klass, void *data)
-{
-    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
-
-    k->init = s390_virtio_rng_init;
-}
-
-static const TypeInfo s390_virtio_rng = {
-    .name          = "virtio-rng-s390",
-    .parent        = TYPE_VIRTIO_S390_DEVICE,
-    .instance_size = sizeof(VirtIOS390Device),
-    .instance_init = s390_virtio_rng_initfn,
-    .class_init    = s390_virtio_rng_class_init,
-};
-
-static int s390_virtio_busdev_init(DeviceState *dev)
-{
-    VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
-    VirtIOS390DeviceClass *_info = VIRTIO_S390_DEVICE_GET_CLASS(dev);
-
-    virtio_s390_bus_new(&_dev->bus, _dev);
-
-    return _info->init(_dev);
-}
-
-static void s390_virtio_busdev_reset(DeviceState *dev)
-{
-    VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
-
-    virtio_reset(_dev->vdev);
-}
-
-static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-
-    dc->init = s390_virtio_busdev_init;
-    dc->bus_type = TYPE_S390_VIRTIO_BUS;
-    dc->unplug = qdev_simple_unplug_cb;
-    dc->reset = s390_virtio_busdev_reset;
-}
-
-static const TypeInfo virtio_s390_device_info = {
-    .name = TYPE_VIRTIO_S390_DEVICE,
-    .parent = TYPE_DEVICE,
-    .instance_size = sizeof(VirtIOS390Device),
-    .class_init = virtio_s390_device_class_init,
-    .class_size = sizeof(VirtIOS390DeviceClass),
-    .abstract = true,
-};
-
-static Property s390_virtio_scsi_properties[] = {
-    DEFINE_VIRTIO_SCSI_PROPERTIES(VirtIOS390Device, host_features, scsi),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void s390_virtio_scsi_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
-
-    k->init = s390_virtio_scsi_init;
-    dc->props = s390_virtio_scsi_properties;
-}
-
-static const TypeInfo s390_virtio_scsi = {
-    .name          = "virtio-scsi-s390",
-    .parent        = TYPE_VIRTIO_S390_DEVICE,
-    .instance_size = sizeof(VirtIOS390Device),
-    .class_init    = s390_virtio_scsi_class_init,
-};
-
-/***************** S390 Virtio Bus Bridge Device *******************/
-/* Only required to have the virtio bus as child in the system bus */
-
-static int s390_virtio_bridge_init(SysBusDevice *dev)
-{
-    /* nothing */
-    return 0;
-}
-
-static void s390_virtio_bridge_class_init(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
-
-    k->init = s390_virtio_bridge_init;
-    dc->no_user = 1;
-}
-
-static const TypeInfo s390_virtio_bridge_info = {
-    .name          = "s390-virtio-bridge",
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(SysBusDevice),
-    .class_init    = s390_virtio_bridge_class_init,
-};
-
-/* virtio-s390-bus */
-
-void virtio_s390_bus_new(VirtioBusState *bus, VirtIOS390Device *dev)
-{
-    DeviceState *qdev = DEVICE(dev);
-    BusState *qbus;
-    qbus_create_inplace((BusState *)bus, TYPE_VIRTIO_S390_BUS, qdev, NULL);
-    qbus = BUS(bus);
-    qbus->allow_hotplug = 0;
-}
-
-static void virtio_s390_bus_class_init(ObjectClass *klass, void *data)
-{
-    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
-    BusClass *bus_class = BUS_CLASS(klass);
-    bus_class->max_dev = 1;
-    k->notify = virtio_s390_notify;
-    k->get_features = virtio_s390_get_features;
-}
-
-static const TypeInfo virtio_s390_bus_info = {
-    .name          = TYPE_VIRTIO_S390_BUS,
-    .parent        = TYPE_VIRTIO_BUS,
-    .instance_size = sizeof(VirtioS390BusState),
-    .class_init    = virtio_s390_bus_class_init,
-};
-
-static void s390_virtio_register_types(void)
-{
-    type_register_static(&virtio_s390_bus_info);
-    type_register_static(&s390_virtio_bus_info);
-    type_register_static(&virtio_s390_device_info);
-    type_register_static(&s390_virtio_serial);
-    type_register_static(&s390_virtio_blk);
-    type_register_static(&s390_virtio_net);
-    type_register_static(&s390_virtio_scsi);
-    type_register_static(&s390_virtio_rng);
-    type_register_static(&s390_virtio_bridge_info);
-}
-
-type_init(s390_virtio_register_types)
diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h
deleted file mode 100644
index 438b37f..0000000
--- a/hw/s390-virtio-bus.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * QEMU S390x VirtIO BUS definitions
- *
- * Copyright (c) 2009 Alexander Graf <agraf at suse.de>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef HW_S390_VIRTIO_BUS_H
-#define HW_S390_VIRTIO_BUS_H 1
-
-#include "virtio-blk.h"
-#include "virtio-net.h"
-#include "virtio-rng.h"
-#include "virtio-serial.h"
-#include "virtio-scsi.h"
-#include "virtio-bus.h"
-
-#define VIRTIO_DEV_OFFS_TYPE		0	/* 8 bits */
-#define VIRTIO_DEV_OFFS_NUM_VQ		1	/* 8 bits */
-#define VIRTIO_DEV_OFFS_FEATURE_LEN	2	/* 8 bits */
-#define VIRTIO_DEV_OFFS_CONFIG_LEN	3	/* 8 bits */
-#define VIRTIO_DEV_OFFS_STATUS		4	/* 8 bits */
-#define VIRTIO_DEV_OFFS_CONFIG		5	/* dynamic */
-
-#define VIRTIO_VQCONFIG_OFFS_TOKEN	0	/* 64 bits */
-#define VIRTIO_VQCONFIG_OFFS_ADDRESS	8	/* 64 bits */
-#define VIRTIO_VQCONFIG_OFFS_NUM	16	/* 16 bits */
-#define VIRTIO_VQCONFIG_LEN		24
-
-#define VIRTIO_RING_LEN			(TARGET_PAGE_SIZE * 3)
-#define VIRTIO_VRING_AVAIL_IDX_OFFS 2
-#define VIRTIO_VRING_USED_IDX_OFFS 2
-#define S390_DEVICE_PAGES		512
-
-#define VIRTIO_PARAM_MASK               0xff
-#define VIRTIO_PARAM_VRING_INTERRUPT    0x0
-#define VIRTIO_PARAM_CONFIG_CHANGED     0x1
-#define VIRTIO_PARAM_DEV_ADD            0x2
-
-#define TYPE_VIRTIO_S390_DEVICE "virtio-s390-device"
-#define VIRTIO_S390_DEVICE(obj) \
-     OBJECT_CHECK(VirtIOS390Device, (obj), TYPE_VIRTIO_S390_DEVICE)
-#define VIRTIO_S390_DEVICE_CLASS(klass) \
-     OBJECT_CLASS_CHECK(VirtIOS390DeviceClass, (klass), TYPE_VIRTIO_S390_DEVICE)
-#define VIRTIO_S390_DEVICE_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(VirtIOS390DeviceClass, (obj), TYPE_VIRTIO_S390_DEVICE)
-
-#define TYPE_S390_VIRTIO_BUS "s390-virtio-bus"
-#define S390_VIRTIO_BUS(obj) \
-     OBJECT_CHECK(VirtIOS390Bus, (obj), TYPE_S390_VIRTIO_BUS)
-
-/* virtio-s390-bus */
-
-typedef struct VirtioBusState VirtioS390BusState;
-typedef struct VirtioBusClass VirtioS390BusClass;
-
-#define TYPE_VIRTIO_S390_BUS "virtio-s390-bus"
-#define VIRTIO_S390_BUS(obj) \
-        OBJECT_CHECK(VirtioS390BusState, (obj), TYPE_VIRTIO_S390_BUS)
-#define VIRTIO_S390_BUS_GET_CLASS(obj) \
-        OBJECT_GET_CLASS(VirtioS390BusClass, obj, TYPE_VIRTIO_S390_BUS)
-#define VIRTIO_S390_BUS_CLASS(klass) \
-        OBJECT_CLASS_CHECK(VirtioS390BusClass, klass, TYPE_VIRTIO_S390_BUS)
-
-
-typedef struct VirtIOS390Device VirtIOS390Device;
-
-void virtio_s390_bus_new(VirtioBusState *bus, VirtIOS390Device *dev);
-
-typedef struct VirtIOS390DeviceClass {
-    DeviceClass qdev;
-    int (*init)(VirtIOS390Device *dev);
-} VirtIOS390DeviceClass;
-
-struct VirtIOS390Device {
-    DeviceState qdev;
-    ram_addr_t dev_offs;
-    ram_addr_t feat_offs;
-    uint8_t feat_len;
-    VirtIODevice *vdev;
-    VirtIOBlkConf blk;
-    NICConf nic;
-    uint32_t host_features;
-    virtio_serial_conf serial;
-    virtio_net_conf net;
-    VirtIOSCSIConf scsi;
-    VirtIORNGConf rng;
-    VirtioBusState bus;
-};
-
-typedef struct VirtIOS390Bus {
-    BusState bus;
-
-    VirtIOS390Device *console;
-    ram_addr_t dev_page;
-    ram_addr_t dev_offs;
-    ram_addr_t next_ring;
-} VirtIOS390Bus;
-
-
-void s390_virtio_device_update_status(VirtIOS390Device *dev);
-
-VirtIOS390Device *s390_virtio_bus_console(VirtIOS390Bus *bus);
-VirtIOS390Bus *s390_virtio_bus_init(ram_addr_t *ram_size);
-
-VirtIOS390Device *s390_virtio_bus_find_vring(VirtIOS390Bus *bus,
-                                             ram_addr_t mem, int *vq_num);
-VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem);
-void s390_virtio_device_sync(VirtIOS390Device *dev);
-void s390_virtio_reset_idx(VirtIOS390Device *dev);
-
-
-#endif
diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
deleted file mode 100644
index 6e0f53b..0000000
--- a/hw/s390-virtio.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * QEMU S390 virtio target
- *
- * Copyright (c) 2009 Alexander Graf <agraf at suse.de>
- * Copyright IBM Corp 2012
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * Contributions after 2012-10-29 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- *
- * You should have received a copy of the GNU (Lesser) General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "hw.h"
-#include "block/block.h"
-#include "sysemu/blockdev.h"
-#include "sysemu/sysemu.h"
-#include "net/net.h"
-#include "boards.h"
-#include "monitor/monitor.h"
-#include "loader.h"
-#include "hw/virtio.h"
-#include "hw/sysbus.h"
-#include "sysemu/kvm.h"
-#include "exec/address-spaces.h"
-
-#include "hw/s390-virtio-bus.h"
-#include "hw/s390x/sclp.h"
-#include "hw/s390-virtio.h"
-
-//#define DEBUG_S390
-
-#ifdef DEBUG_S390
-#define dprintf(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define dprintf(fmt, ...) \
-    do { } while (0)
-#endif
-
-#define MAX_BLK_DEVS                    10
-
-static VirtIOS390Bus *s390_bus;
-static S390CPU **ipi_states;
-
-S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
-{
-    if (cpu_addr >= smp_cpus) {
-        return NULL;
-    }
-
-    return ipi_states[cpu_addr];
-}
-
-static int s390_virtio_hcall_notify(const uint64_t *args)
-{
-    uint64_t mem = args[0];
-    int r = 0, i;
-
-    if (mem > ram_size) {
-        VirtIOS390Device *dev = s390_virtio_bus_find_vring(s390_bus, mem, &i);
-        if (dev) {
-            virtio_queue_notify(dev->vdev, i);
-        } else {
-            r = -EINVAL;
-        }
-    } else {
-        /* Early printk */
-    }
-    return r;
-}
-
-static int s390_virtio_hcall_reset(const uint64_t *args)
-{
-    uint64_t mem = args[0];
-    VirtIOS390Device *dev;
-
-    dev = s390_virtio_bus_find_mem(s390_bus, mem);
-    virtio_reset(dev->vdev);
-    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
-    s390_virtio_device_sync(dev);
-    s390_virtio_reset_idx(dev);
-
-    return 0;
-}
-
-static int s390_virtio_hcall_set_status(const uint64_t *args)
-{
-    uint64_t mem = args[0];
-    int r = 0;
-    VirtIOS390Device *dev;
-
-    dev = s390_virtio_bus_find_mem(s390_bus, mem);
-    if (dev) {
-        s390_virtio_device_update_status(dev);
-    } else {
-        r = -EINVAL;
-    }
-    return r;
-}
-
-static void s390_virtio_register_hcalls(void)
-{
-    s390_register_virtio_hypercall(KVM_S390_VIRTIO_NOTIFY,
-                                   s390_virtio_hcall_notify);
-    s390_register_virtio_hypercall(KVM_S390_VIRTIO_RESET,
-                                   s390_virtio_hcall_reset);
-    s390_register_virtio_hypercall(KVM_S390_VIRTIO_SET_STATUS,
-                                   s390_virtio_hcall_set_status);
-}
-
-/*
- * The number of running CPUs. On s390 a shutdown is the state of all CPUs
- * being either stopped or disabled (for interrupts) waiting. We have to
- * track this number to call the shutdown sequence accordingly. This
- * number is modified either on startup or while holding the big qemu lock.
- */
-static unsigned s390_running_cpus;
-
-void s390_add_running_cpu(CPUS390XState *env)
-{
-    if (env->halted) {
-        s390_running_cpus++;
-        env->halted = 0;
-        env->exception_index = -1;
-    }
-}
-
-unsigned s390_del_running_cpu(CPUS390XState *env)
-{
-    if (env->halted == 0) {
-        assert(s390_running_cpus >= 1);
-        s390_running_cpus--;
-        env->halted = 1;
-        env->exception_index = EXCP_HLT;
-    }
-    return s390_running_cpus;
-}
-
-void s390_init_ipl_dev(const char *kernel_filename,
-                       const char *kernel_cmdline,
-                       const char *initrd_filename)
-{
-    DeviceState *dev;
-
-    dev  = qdev_create(NULL, "s390-ipl");
-    if (kernel_filename) {
-        qdev_prop_set_string(dev, "kernel", kernel_filename);
-    }
-    if (initrd_filename) {
-        qdev_prop_set_string(dev, "initrd", initrd_filename);
-    }
-    qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
-    qdev_init_nofail(dev);
-}
-
-void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys)
-{
-    int i;
-
-    if (cpu_model == NULL) {
-        cpu_model = "host";
-    }
-
-    ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
-
-    for (i = 0; i < smp_cpus; i++) {
-        S390CPU *cpu;
-
-        cpu = cpu_s390x_init(cpu_model);
-
-        ipi_states[i] = cpu;
-        cpu->env.halted = 1;
-        cpu->env.exception_index = EXCP_HLT;
-        cpu->env.storage_keys = storage_keys;
-    }
-}
-
-
-void s390_create_virtio_net(BusState *bus, const char *name)
-{
-    int i;
-
-    for (i = 0; i < nb_nics; i++) {
-        NICInfo *nd = &nd_table[i];
-        DeviceState *dev;
-
-        if (!nd->model) {
-            nd->model = g_strdup("virtio");
-        }
-
-        if (strcmp(nd->model, "virtio")) {
-            fprintf(stderr, "S390 only supports VirtIO nics\n");
-            exit(1);
-        }
-
-        dev = qdev_create(bus, name);
-        qdev_set_nic_properties(dev, nd);
-        qdev_init_nofail(dev);
-    }
-}
-
-/* PC hardware initialisation */
-static void s390_init(QEMUMachineInitArgs *args)
-{
-    ram_addr_t my_ram_size = args->ram_size;
-    MemoryRegion *sysmem = get_system_memory();
-    MemoryRegion *ram = g_new(MemoryRegion, 1);
-    int shift = 0;
-    uint8_t *storage_keys;
-    void *virtio_region;
-    hwaddr virtio_region_len;
-    hwaddr virtio_region_start;
-
-    /* s390x ram size detection needs a 16bit multiplier + an increment. So
-       guests > 64GB can be specified in 2MB steps etc. */
-    while ((my_ram_size >> (20 + shift)) > 65535) {
-        shift++;
-    }
-    my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
-
-    /* lets propagate the changed ram size into the global variable. */
-    ram_size = my_ram_size;
-
-    /* get a BUS */
-    s390_bus = s390_virtio_bus_init(&my_ram_size);
-    s390_sclp_init();
-    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
-                      args->initrd_filename);
-
-    /* register hypercalls */
-    s390_virtio_register_hcalls();
-
-    /* allocate RAM */
-    memory_region_init_ram(ram, "s390.ram", my_ram_size);
-    vmstate_register_ram_global(ram);
-    memory_region_add_subregion(sysmem, 0, ram);
-
-    /* clear virtio region */
-    virtio_region_len = my_ram_size - ram_size;
-    virtio_region_start = ram_size;
-    virtio_region = cpu_physical_memory_map(virtio_region_start,
-                                            &virtio_region_len, true);
-    memset(virtio_region, 0, virtio_region_len);
-    cpu_physical_memory_unmap(virtio_region, virtio_region_len, 1,
-                              virtio_region_len);
-
-    /* allocate storage keys */
-    storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
-
-    /* init CPUs */
-    s390_init_cpus(args->cpu_model, storage_keys);
-
-    /* Create VirtIO network adapters */
-    s390_create_virtio_net((BusState *)s390_bus, "virtio-net-s390");
-}
-
-static QEMUMachine s390_machine = {
-    .name = "s390-virtio",
-    .alias = "s390",
-    .desc = "VirtIO based S390 machine",
-    .init = s390_init,
-    .block_default_type = IF_VIRTIO,
-    .no_cdrom = 1,
-    .no_floppy = 1,
-    .no_serial = 1,
-    .no_parallel = 1,
-    .no_sdcard = 1,
-    .use_virtcon = 1,
-    .max_cpus = 255,
-    .is_default = 1,
-    DEFAULT_MACHINE_OPTIONS,
-};
-
-static void s390_machine_init(void)
-{
-    qemu_register_machine(&s390_machine);
-}
-
-machine_init(s390_machine_init);
diff --git a/hw/s390-virtio.h b/hw/s390-virtio.h
deleted file mode 100644
index 67bfd20..0000000
--- a/hw/s390-virtio.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Virtio interfaces for s390
- *
- * Copyright 2012 IBM Corp.
- * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or (at
- * your option) any later version. See the COPYING file in the top-level
- * directory.
- */
-
-#ifndef HW_S390_VIRTIO_H
-#define HW_S390_VIRTIO_H 1
-
-#define KVM_S390_VIRTIO_NOTIFY          0
-#define KVM_S390_VIRTIO_RESET           1
-#define KVM_S390_VIRTIO_SET_STATUS      2
-
-typedef int (*s390_virtio_fn)(const uint64_t *args);
-void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
-
-void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys);
-void s390_init_ipl_dev(const char *kernel_filename,
-                       const char *kernel_cmdline,
-                       const char *initrd_filename);
-void s390_create_virtio_net(BusState *bus, const char *name);
-#endif
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index f6b461b..e4ee456 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -1,6 +1,4 @@
 obj-y = s390-virtio-bus.o s390-virtio.o
-
-obj-y := $(addprefix ../,$(obj-y))
 obj-y += s390-virtio-hcall.o
 obj-y += sclp.o
 obj-y += event-facility.o
diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c
new file mode 100644
index 0000000..32f63b0
--- /dev/null
+++ b/hw/s390x/s390-virtio-bus.c
@@ -0,0 +1,623 @@
+/*
+ * QEMU S390 virtio target
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf at suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "block/block.h"
+#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#include "monitor/monitor.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/virtio.h"
+#include "hw/virtio-rng.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-net.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+
+#include "hw/s390x/s390-virtio-bus.h"
+#include "hw/virtio-bus.h"
+
+/* #define DEBUG_S390 */
+
+#ifdef DEBUG_S390
+#define dprintf(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define VIRTIO_EXT_CODE   0x2603
+
+static const TypeInfo s390_virtio_bus_info = {
+    .name = TYPE_S390_VIRTIO_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(VirtIOS390Bus),
+};
+
+static const VirtIOBindings virtio_s390_bindings;
+
+static ram_addr_t s390_virtio_device_num_vq(VirtIOS390Device *dev);
+
+/* length of VirtIO device pages */
+const hwaddr virtio_size = S390_DEVICE_PAGES * TARGET_PAGE_SIZE;
+
+static void s390_virtio_bus_reset(void *opaque)
+{
+    VirtIOS390Bus *bus = opaque;
+    bus->next_ring = bus->dev_page + TARGET_PAGE_SIZE;
+}
+
+void s390_virtio_reset_idx(VirtIOS390Device *dev)
+{
+    int i;
+    hwaddr idx_addr;
+    uint8_t num_vq;
+
+    num_vq = s390_virtio_device_num_vq(dev);
+    for (i = 0; i < num_vq; i++) {
+        idx_addr = virtio_queue_get_avail_addr(dev->vdev, i) +
+            VIRTIO_VRING_AVAIL_IDX_OFFS;
+        stw_phys(idx_addr, 0);
+        idx_addr = virtio_queue_get_used_addr(dev->vdev, i) +
+            VIRTIO_VRING_USED_IDX_OFFS;
+        stw_phys(idx_addr, 0);
+    }
+}
+
+VirtIOS390Bus *s390_virtio_bus_init(ram_addr_t *ram_size)
+{
+    VirtIOS390Bus *bus;
+    BusState *_bus;
+    DeviceState *dev;
+
+    /* Create bridge device */
+    dev = qdev_create(NULL, "s390-virtio-bridge");
+    qdev_init_nofail(dev);
+
+    /* Create bus on bridge device */
+
+    _bus = qbus_create(TYPE_S390_VIRTIO_BUS, dev, "s390-virtio");
+    bus = DO_UPCAST(VirtIOS390Bus, bus, _bus);
+
+    bus->dev_page = *ram_size;
+    bus->dev_offs = bus->dev_page;
+    bus->next_ring = bus->dev_page + TARGET_PAGE_SIZE;
+
+    /* Enable hotplugging */
+    _bus->allow_hotplug = 1;
+
+    /* Allocate RAM for VirtIO device pages (descriptors, queues, rings) */
+    *ram_size += S390_DEVICE_PAGES * TARGET_PAGE_SIZE;
+
+    qemu_register_reset(s390_virtio_bus_reset, bus);
+    return bus;
+}
+
+static void s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token)
+{
+    CPUS390XState *env = &cpu->env;
+
+    if (kvm_enabled()) {
+        kvm_s390_virtio_irq(cpu, config_change, token);
+    } else {
+        cpu_inject_ext(env, VIRTIO_EXT_CODE, config_change, token);
+    }
+}
+
+static int s390_virtio_device_init(VirtIOS390Device *dev, VirtIODevice *vdev)
+{
+    VirtIOS390Bus *bus;
+    int dev_len;
+
+    bus = DO_UPCAST(VirtIOS390Bus, bus, dev->qdev.parent_bus);
+    dev->vdev = vdev;
+    dev->dev_offs = bus->dev_offs;
+    dev->feat_len = sizeof(uint32_t); /* always keep 32 bits features */
+
+    dev_len = VIRTIO_DEV_OFFS_CONFIG;
+    dev_len += s390_virtio_device_num_vq(dev) * VIRTIO_VQCONFIG_LEN;
+    dev_len += dev->feat_len * 2;
+    dev_len += vdev->config_len;
+
+    bus->dev_offs += dev_len;
+
+    virtio_bind_device(vdev, &virtio_s390_bindings, DEVICE(dev));
+    dev->host_features = vdev->get_features(vdev, dev->host_features);
+    s390_virtio_device_sync(dev);
+    s390_virtio_reset_idx(dev);
+    if (dev->qdev.hotplugged) {
+        S390CPU *cpu = s390_cpu_addr2state(0);
+        s390_virtio_irq(cpu, VIRTIO_PARAM_DEV_ADD, dev->dev_offs);
+    }
+
+    return 0;
+}
+
+static int s390_virtio_net_init(VirtIOS390Device *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
+    if (!vdev) {
+        return -1;
+    }
+
+    return s390_virtio_device_init(dev, vdev);
+}
+
+static int s390_virtio_blk_init(VirtIOS390Device *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_blk_init((DeviceState *)dev, &dev->blk);
+    if (!vdev) {
+        return -1;
+    }
+
+    return s390_virtio_device_init(dev, vdev);
+}
+
+static int s390_virtio_serial_init(VirtIOS390Device *dev)
+{
+    VirtIOS390Bus *bus;
+    VirtIODevice *vdev;
+    int r;
+
+    bus = DO_UPCAST(VirtIOS390Bus, bus, dev->qdev.parent_bus);
+
+    vdev = virtio_serial_init((DeviceState *)dev, &dev->serial);
+    if (!vdev) {
+        return -1;
+    }
+
+    r = s390_virtio_device_init(dev, vdev);
+    if (!r) {
+        bus->console = dev;
+    }
+
+    return r;
+}
+
+static int s390_virtio_scsi_init(VirtIOS390Device *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_scsi_init((DeviceState *)dev, &dev->scsi);
+    if (!vdev) {
+        return -1;
+    }
+
+    return s390_virtio_device_init(dev, vdev);
+}
+
+static int s390_virtio_rng_init(VirtIOS390Device *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_rng_init((DeviceState *)dev, &dev->rng);
+    if (!vdev) {
+        return -1;
+    }
+
+    return s390_virtio_device_init(dev, vdev);
+}
+
+static uint64_t s390_virtio_device_vq_token(VirtIOS390Device *dev, int vq)
+{
+    ram_addr_t token_off;
+
+    token_off = (dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG) +
+                (vq * VIRTIO_VQCONFIG_LEN) +
+                VIRTIO_VQCONFIG_OFFS_TOKEN;
+
+    return ldq_be_phys(token_off);
+}
+
+static ram_addr_t s390_virtio_device_num_vq(VirtIOS390Device *dev)
+{
+    VirtIODevice *vdev = dev->vdev;
+    int num_vq;
+
+    for (num_vq = 0; num_vq < VIRTIO_PCI_QUEUE_MAX; num_vq++) {
+        if (!virtio_queue_get_num(vdev, num_vq)) {
+            break;
+        }
+    }
+
+    return num_vq;
+}
+
+static ram_addr_t s390_virtio_next_ring(VirtIOS390Bus *bus)
+{
+    ram_addr_t r = bus->next_ring;
+
+    bus->next_ring += VIRTIO_RING_LEN;
+    return r;
+}
+
+void s390_virtio_device_sync(VirtIOS390Device *dev)
+{
+    VirtIOS390Bus *bus = DO_UPCAST(VirtIOS390Bus, bus, dev->qdev.parent_bus);
+    ram_addr_t cur_offs;
+    uint8_t num_vq;
+    int i;
+
+    virtio_reset(dev->vdev);
+
+    /* Sync dev space */
+    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_TYPE, dev->vdev->device_id);
+
+    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ, s390_virtio_device_num_vq(dev));
+    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_FEATURE_LEN, dev->feat_len);
+
+    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG_LEN, dev->vdev->config_len);
+
+    num_vq = s390_virtio_device_num_vq(dev);
+    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ, num_vq);
+
+    /* Sync virtqueues */
+    for (i = 0; i < num_vq; i++) {
+        ram_addr_t vq = (dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG) +
+                        (i * VIRTIO_VQCONFIG_LEN);
+        ram_addr_t vring;
+
+        vring = s390_virtio_next_ring(bus);
+        virtio_queue_set_addr(dev->vdev, i, vring);
+        virtio_queue_set_vector(dev->vdev, i, i);
+        stq_be_phys(vq + VIRTIO_VQCONFIG_OFFS_ADDRESS, vring);
+        stw_be_phys(vq + VIRTIO_VQCONFIG_OFFS_NUM, virtio_queue_get_num(dev->vdev, i));
+    }
+
+    cur_offs = dev->dev_offs;
+    cur_offs += VIRTIO_DEV_OFFS_CONFIG;
+    cur_offs += num_vq * VIRTIO_VQCONFIG_LEN;
+
+    /* Sync feature bitmap */
+    stl_le_phys(cur_offs, dev->host_features);
+
+    dev->feat_offs = cur_offs + dev->feat_len;
+    cur_offs += dev->feat_len * 2;
+
+    /* Sync config space */
+    if (dev->vdev->get_config) {
+        dev->vdev->get_config(dev->vdev, dev->vdev->config);
+    }
+
+    cpu_physical_memory_write(cur_offs,
+                              dev->vdev->config, dev->vdev->config_len);
+    cur_offs += dev->vdev->config_len;
+}
+
+void s390_virtio_device_update_status(VirtIOS390Device *dev)
+{
+    VirtIODevice *vdev = dev->vdev;
+    uint32_t features;
+
+    virtio_set_status(vdev, ldub_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS));
+
+    /* Update guest supported feature bitmap */
+
+    features = bswap32(ldl_be_phys(dev->feat_offs));
+    virtio_set_features(vdev, features);
+}
+
+VirtIOS390Device *s390_virtio_bus_console(VirtIOS390Bus *bus)
+{
+    return bus->console;
+}
+
+/* Find a device by vring address */
+VirtIOS390Device *s390_virtio_bus_find_vring(VirtIOS390Bus *bus,
+                                             ram_addr_t mem,
+                                             int *vq_num)
+{
+    BusChild *kid;
+    int i;
+
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        VirtIOS390Device *dev = (VirtIOS390Device *)kid->child;
+
+        for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+            if (!virtio_queue_get_addr(dev->vdev, i))
+                break;
+            if (virtio_queue_get_addr(dev->vdev, i) == mem) {
+                if (vq_num) {
+                    *vq_num = i;
+                }
+                return dev;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/* Find a device by device descriptor location */
+VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem)
+{
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        VirtIOS390Device *dev = (VirtIOS390Device *)kid->child;
+        if (dev->dev_offs == mem) {
+            return dev;
+        }
+    }
+
+    return NULL;
+}
+
+/* DeviceState to VirtIOS390Device. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtIOS390Device *to_virtio_s390_device_fast(DeviceState *d)
+{
+    return container_of(d, VirtIOS390Device, qdev);
+}
+
+/* DeviceState to VirtIOS390Device. TODO: use QOM. */
+static inline VirtIOS390Device *to_virtio_s390_device(DeviceState *d)
+{
+    return container_of(d, VirtIOS390Device, qdev);
+}
+
+static void virtio_s390_notify(DeviceState *d, uint16_t vector)
+{
+    VirtIOS390Device *dev = to_virtio_s390_device_fast(d);
+    uint64_t token = s390_virtio_device_vq_token(dev, vector);
+    S390CPU *cpu = s390_cpu_addr2state(0);
+
+    s390_virtio_irq(cpu, 0, token);
+}
+
+static unsigned virtio_s390_get_features(DeviceState *d)
+{
+    VirtIOS390Device *dev = to_virtio_s390_device(d);
+    return dev->host_features;
+}
+
+/**************** S390 Virtio Bus Device Descriptions *******************/
+
+static const VirtIOBindings virtio_s390_bindings = {
+    .notify = virtio_s390_notify,
+    .get_features = virtio_s390_get_features,
+};
+
+static Property s390_virtio_net_properties[] = {
+    DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic),
+    DEFINE_PROP_UINT32("x-txtimer", VirtIOS390Device,
+                       net.txtimer, TX_TIMER_INTERVAL),
+    DEFINE_PROP_INT32("x-txburst", VirtIOS390Device,
+                      net.txburst, TX_BURST),
+    DEFINE_PROP_STRING("tx", VirtIOS390Device, net.tx),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_virtio_net_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
+
+    k->init = s390_virtio_net_init;
+    dc->props = s390_virtio_net_properties;
+}
+
+static const TypeInfo s390_virtio_net = {
+    .name          = "virtio-net-s390",
+    .parent        = TYPE_VIRTIO_S390_DEVICE,
+    .instance_size = sizeof(VirtIOS390Device),
+    .class_init    = s390_virtio_net_class_init,
+};
+
+static Property s390_virtio_blk_properties[] = {
+    DEFINE_BLOCK_PROPERTIES(VirtIOS390Device, blk.conf),
+    DEFINE_BLOCK_CHS_PROPERTIES(VirtIOS390Device, blk.conf),
+    DEFINE_PROP_STRING("serial", VirtIOS390Device, blk.serial),
+#ifdef __linux__
+    DEFINE_PROP_BIT("scsi", VirtIOS390Device, blk.scsi, 0, true),
+#endif
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_virtio_blk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
+
+    k->init = s390_virtio_blk_init;
+    dc->props = s390_virtio_blk_properties;
+}
+
+static const TypeInfo s390_virtio_blk = {
+    .name          = "virtio-blk-s390",
+    .parent        = TYPE_VIRTIO_S390_DEVICE,
+    .instance_size = sizeof(VirtIOS390Device),
+    .class_init    = s390_virtio_blk_class_init,
+};
+
+static Property s390_virtio_serial_properties[] = {
+    DEFINE_PROP_UINT32("max_ports", VirtIOS390Device,
+                       serial.max_virtserial_ports, 31),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_virtio_serial_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
+
+    k->init = s390_virtio_serial_init;
+    dc->props = s390_virtio_serial_properties;
+}
+
+static const TypeInfo s390_virtio_serial = {
+    .name          = "virtio-serial-s390",
+    .parent        = TYPE_VIRTIO_S390_DEVICE,
+    .instance_size = sizeof(VirtIOS390Device),
+    .class_init    = s390_virtio_serial_class_init,
+};
+
+static void s390_virtio_rng_initfn(Object *obj)
+{
+    VirtIOS390Device *dev = VIRTIO_S390_DEVICE(obj);
+
+    object_property_add_link(obj, "rng", TYPE_RNG_BACKEND,
+                             (Object **)&dev->rng.rng, NULL);
+}
+
+static void s390_virtio_rng_class_init(ObjectClass *klass, void *data)
+{
+    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
+
+    k->init = s390_virtio_rng_init;
+}
+
+static const TypeInfo s390_virtio_rng = {
+    .name          = "virtio-rng-s390",
+    .parent        = TYPE_VIRTIO_S390_DEVICE,
+    .instance_size = sizeof(VirtIOS390Device),
+    .instance_init = s390_virtio_rng_initfn,
+    .class_init    = s390_virtio_rng_class_init,
+};
+
+static int s390_virtio_busdev_init(DeviceState *dev)
+{
+    VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
+    VirtIOS390DeviceClass *_info = VIRTIO_S390_DEVICE_GET_CLASS(dev);
+
+    virtio_s390_bus_new(&_dev->bus, _dev);
+
+    return _info->init(_dev);
+}
+
+static void s390_virtio_busdev_reset(DeviceState *dev)
+{
+    VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
+
+    virtio_reset(_dev->vdev);
+}
+
+static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->init = s390_virtio_busdev_init;
+    dc->bus_type = TYPE_S390_VIRTIO_BUS;
+    dc->unplug = qdev_simple_unplug_cb;
+    dc->reset = s390_virtio_busdev_reset;
+}
+
+static const TypeInfo virtio_s390_device_info = {
+    .name = TYPE_VIRTIO_S390_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(VirtIOS390Device),
+    .class_init = virtio_s390_device_class_init,
+    .class_size = sizeof(VirtIOS390DeviceClass),
+    .abstract = true,
+};
+
+static Property s390_virtio_scsi_properties[] = {
+    DEFINE_VIRTIO_SCSI_PROPERTIES(VirtIOS390Device, host_features, scsi),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_virtio_scsi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOS390DeviceClass *k = VIRTIO_S390_DEVICE_CLASS(klass);
+
+    k->init = s390_virtio_scsi_init;
+    dc->props = s390_virtio_scsi_properties;
+}
+
+static const TypeInfo s390_virtio_scsi = {
+    .name          = "virtio-scsi-s390",
+    .parent        = TYPE_VIRTIO_S390_DEVICE,
+    .instance_size = sizeof(VirtIOS390Device),
+    .class_init    = s390_virtio_scsi_class_init,
+};
+
+/***************** S390 Virtio Bus Bridge Device *******************/
+/* Only required to have the virtio bus as child in the system bus */
+
+static int s390_virtio_bridge_init(SysBusDevice *dev)
+{
+    /* nothing */
+    return 0;
+}
+
+static void s390_virtio_bridge_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = s390_virtio_bridge_init;
+    dc->no_user = 1;
+}
+
+static const TypeInfo s390_virtio_bridge_info = {
+    .name          = "s390-virtio-bridge",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SysBusDevice),
+    .class_init    = s390_virtio_bridge_class_init,
+};
+
+/* virtio-s390-bus */
+
+void virtio_s390_bus_new(VirtioBusState *bus, VirtIOS390Device *dev)
+{
+    DeviceState *qdev = DEVICE(dev);
+    BusState *qbus;
+    qbus_create_inplace((BusState *)bus, TYPE_VIRTIO_S390_BUS, qdev, NULL);
+    qbus = BUS(bus);
+    qbus->allow_hotplug = 0;
+}
+
+static void virtio_s390_bus_class_init(ObjectClass *klass, void *data)
+{
+    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
+    BusClass *bus_class = BUS_CLASS(klass);
+    bus_class->max_dev = 1;
+    k->notify = virtio_s390_notify;
+    k->get_features = virtio_s390_get_features;
+}
+
+static const TypeInfo virtio_s390_bus_info = {
+    .name          = TYPE_VIRTIO_S390_BUS,
+    .parent        = TYPE_VIRTIO_BUS,
+    .instance_size = sizeof(VirtioS390BusState),
+    .class_init    = virtio_s390_bus_class_init,
+};
+
+static void s390_virtio_register_types(void)
+{
+    type_register_static(&virtio_s390_bus_info);
+    type_register_static(&s390_virtio_bus_info);
+    type_register_static(&virtio_s390_device_info);
+    type_register_static(&s390_virtio_serial);
+    type_register_static(&s390_virtio_blk);
+    type_register_static(&s390_virtio_net);
+    type_register_static(&s390_virtio_scsi);
+    type_register_static(&s390_virtio_rng);
+    type_register_static(&s390_virtio_bridge_info);
+}
+
+type_init(s390_virtio_register_types)
diff --git a/hw/s390x/s390-virtio-bus.h b/hw/s390x/s390-virtio-bus.h
new file mode 100644
index 0000000..4aacf83
--- /dev/null
+++ b/hw/s390x/s390-virtio-bus.h
@@ -0,0 +1,124 @@
+/*
+ * QEMU S390x VirtIO BUS definitions
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf at suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef HW_S390_VIRTIO_BUS_H
+#define HW_S390_VIRTIO_BUS_H 1
+
+#include "hw/virtio-blk.h"
+#include "hw/virtio-net.h"
+#include "hw/virtio-rng.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-scsi.h"
+#include "hw/virtio-bus.h"
+
+#define VIRTIO_DEV_OFFS_TYPE		0	/* 8 bits */
+#define VIRTIO_DEV_OFFS_NUM_VQ		1	/* 8 bits */
+#define VIRTIO_DEV_OFFS_FEATURE_LEN	2	/* 8 bits */
+#define VIRTIO_DEV_OFFS_CONFIG_LEN	3	/* 8 bits */
+#define VIRTIO_DEV_OFFS_STATUS		4	/* 8 bits */
+#define VIRTIO_DEV_OFFS_CONFIG		5	/* dynamic */
+
+#define VIRTIO_VQCONFIG_OFFS_TOKEN	0	/* 64 bits */
+#define VIRTIO_VQCONFIG_OFFS_ADDRESS	8	/* 64 bits */
+#define VIRTIO_VQCONFIG_OFFS_NUM	16	/* 16 bits */
+#define VIRTIO_VQCONFIG_LEN		24
+
+#define VIRTIO_RING_LEN			(TARGET_PAGE_SIZE * 3)
+#define VIRTIO_VRING_AVAIL_IDX_OFFS 2
+#define VIRTIO_VRING_USED_IDX_OFFS 2
+#define S390_DEVICE_PAGES		512
+
+#define VIRTIO_PARAM_MASK               0xff
+#define VIRTIO_PARAM_VRING_INTERRUPT    0x0
+#define VIRTIO_PARAM_CONFIG_CHANGED     0x1
+#define VIRTIO_PARAM_DEV_ADD            0x2
+
+#define TYPE_VIRTIO_S390_DEVICE "virtio-s390-device"
+#define VIRTIO_S390_DEVICE(obj) \
+     OBJECT_CHECK(VirtIOS390Device, (obj), TYPE_VIRTIO_S390_DEVICE)
+#define VIRTIO_S390_DEVICE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(VirtIOS390DeviceClass, (klass), TYPE_VIRTIO_S390_DEVICE)
+#define VIRTIO_S390_DEVICE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(VirtIOS390DeviceClass, (obj), TYPE_VIRTIO_S390_DEVICE)
+
+#define TYPE_S390_VIRTIO_BUS "s390-virtio-bus"
+#define S390_VIRTIO_BUS(obj) \
+     OBJECT_CHECK(VirtIOS390Bus, (obj), TYPE_S390_VIRTIO_BUS)
+
+/* virtio-s390-bus */
+
+typedef struct VirtioBusState VirtioS390BusState;
+typedef struct VirtioBusClass VirtioS390BusClass;
+
+#define TYPE_VIRTIO_S390_BUS "virtio-s390-bus"
+#define VIRTIO_S390_BUS(obj) \
+        OBJECT_CHECK(VirtioS390BusState, (obj), TYPE_VIRTIO_S390_BUS)
+#define VIRTIO_S390_BUS_GET_CLASS(obj) \
+        OBJECT_GET_CLASS(VirtioS390BusClass, obj, TYPE_VIRTIO_S390_BUS)
+#define VIRTIO_S390_BUS_CLASS(klass) \
+        OBJECT_CLASS_CHECK(VirtioS390BusClass, klass, TYPE_VIRTIO_S390_BUS)
+
+
+typedef struct VirtIOS390Device VirtIOS390Device;
+
+void virtio_s390_bus_new(VirtioBusState *bus, VirtIOS390Device *dev);
+
+typedef struct VirtIOS390DeviceClass {
+    DeviceClass qdev;
+    int (*init)(VirtIOS390Device *dev);
+} VirtIOS390DeviceClass;
+
+struct VirtIOS390Device {
+    DeviceState qdev;
+    ram_addr_t dev_offs;
+    ram_addr_t feat_offs;
+    uint8_t feat_len;
+    VirtIODevice *vdev;
+    VirtIOBlkConf blk;
+    NICConf nic;
+    uint32_t host_features;
+    virtio_serial_conf serial;
+    virtio_net_conf net;
+    VirtIOSCSIConf scsi;
+    VirtIORNGConf rng;
+    VirtioBusState bus;
+};
+
+typedef struct VirtIOS390Bus {
+    BusState bus;
+
+    VirtIOS390Device *console;
+    ram_addr_t dev_page;
+    ram_addr_t dev_offs;
+    ram_addr_t next_ring;
+} VirtIOS390Bus;
+
+
+void s390_virtio_device_update_status(VirtIOS390Device *dev);
+
+VirtIOS390Device *s390_virtio_bus_console(VirtIOS390Bus *bus);
+VirtIOS390Bus *s390_virtio_bus_init(ram_addr_t *ram_size);
+
+VirtIOS390Device *s390_virtio_bus_find_vring(VirtIOS390Bus *bus,
+                                             ram_addr_t mem, int *vq_num);
+VirtIOS390Device *s390_virtio_bus_find_mem(VirtIOS390Bus *bus, ram_addr_t mem);
+void s390_virtio_device_sync(VirtIOS390Device *dev);
+void s390_virtio_reset_idx(VirtIOS390Device *dev);
+
+
+#endif
diff --git a/hw/s390x/s390-virtio-hcall.c b/hw/s390x/s390-virtio-hcall.c
index d7938c0..ee62649 100644
--- a/hw/s390x/s390-virtio-hcall.c
+++ b/hw/s390x/s390-virtio-hcall.c
@@ -10,7 +10,7 @@
  */
 
 #include "cpu.h"
-#include "hw/s390-virtio.h"
+#include "hw/s390x/s390-virtio.h"
 
 #define MAX_DIAG_SUBCODES 255
 
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
new file mode 100644
index 0000000..a8a489d
--- /dev/null
+++ b/hw/s390x/s390-virtio.c
@@ -0,0 +1,290 @@
+/*
+ * QEMU S390 virtio target
+ *
+ * Copyright (c) 2009 Alexander Graf <agraf at suse.de>
+ * Copyright IBM Corp 2012
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * Contributions after 2012-10-29 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU (Lesser) General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "block/block.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "hw/boards.h"
+#include "monitor/monitor.h"
+#include "hw/loader.h"
+#include "hw/virtio.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "exec/address-spaces.h"
+
+#include "hw/s390x/s390-virtio-bus.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/s390-virtio.h"
+
+//#define DEBUG_S390
+
+#ifdef DEBUG_S390
+#define dprintf(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define dprintf(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define MAX_BLK_DEVS                    10
+
+static VirtIOS390Bus *s390_bus;
+static S390CPU **ipi_states;
+
+S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
+{
+    if (cpu_addr >= smp_cpus) {
+        return NULL;
+    }
+
+    return ipi_states[cpu_addr];
+}
+
+static int s390_virtio_hcall_notify(const uint64_t *args)
+{
+    uint64_t mem = args[0];
+    int r = 0, i;
+
+    if (mem > ram_size) {
+        VirtIOS390Device *dev = s390_virtio_bus_find_vring(s390_bus, mem, &i);
+        if (dev) {
+            virtio_queue_notify(dev->vdev, i);
+        } else {
+            r = -EINVAL;
+        }
+    } else {
+        /* Early printk */
+    }
+    return r;
+}
+
+static int s390_virtio_hcall_reset(const uint64_t *args)
+{
+    uint64_t mem = args[0];
+    VirtIOS390Device *dev;
+
+    dev = s390_virtio_bus_find_mem(s390_bus, mem);
+    virtio_reset(dev->vdev);
+    stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
+    s390_virtio_device_sync(dev);
+    s390_virtio_reset_idx(dev);
+
+    return 0;
+}
+
+static int s390_virtio_hcall_set_status(const uint64_t *args)
+{
+    uint64_t mem = args[0];
+    int r = 0;
+    VirtIOS390Device *dev;
+
+    dev = s390_virtio_bus_find_mem(s390_bus, mem);
+    if (dev) {
+        s390_virtio_device_update_status(dev);
+    } else {
+        r = -EINVAL;
+    }
+    return r;
+}
+
+static void s390_virtio_register_hcalls(void)
+{
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_NOTIFY,
+                                   s390_virtio_hcall_notify);
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_RESET,
+                                   s390_virtio_hcall_reset);
+    s390_register_virtio_hypercall(KVM_S390_VIRTIO_SET_STATUS,
+                                   s390_virtio_hcall_set_status);
+}
+
+/*
+ * The number of running CPUs. On s390 a shutdown is the state of all CPUs
+ * being either stopped or disabled (for interrupts) waiting. We have to
+ * track this number to call the shutdown sequence accordingly. This
+ * number is modified either on startup or while holding the big qemu lock.
+ */
+static unsigned s390_running_cpus;
+
+void s390_add_running_cpu(CPUS390XState *env)
+{
+    if (env->halted) {
+        s390_running_cpus++;
+        env->halted = 0;
+        env->exception_index = -1;
+    }
+}
+
+unsigned s390_del_running_cpu(CPUS390XState *env)
+{
+    if (env->halted == 0) {
+        assert(s390_running_cpus >= 1);
+        s390_running_cpus--;
+        env->halted = 1;
+        env->exception_index = EXCP_HLT;
+    }
+    return s390_running_cpus;
+}
+
+void s390_init_ipl_dev(const char *kernel_filename,
+                       const char *kernel_cmdline,
+                       const char *initrd_filename)
+{
+    DeviceState *dev;
+
+    dev  = qdev_create(NULL, "s390-ipl");
+    if (kernel_filename) {
+        qdev_prop_set_string(dev, "kernel", kernel_filename);
+    }
+    if (initrd_filename) {
+        qdev_prop_set_string(dev, "initrd", initrd_filename);
+    }
+    qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
+    qdev_init_nofail(dev);
+}
+
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys)
+{
+    int i;
+
+    if (cpu_model == NULL) {
+        cpu_model = "host";
+    }
+
+    ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
+
+    for (i = 0; i < smp_cpus; i++) {
+        S390CPU *cpu;
+
+        cpu = cpu_s390x_init(cpu_model);
+
+        ipi_states[i] = cpu;
+        cpu->env.halted = 1;
+        cpu->env.exception_index = EXCP_HLT;
+        cpu->env.storage_keys = storage_keys;
+    }
+}
+
+
+void s390_create_virtio_net(BusState *bus, const char *name)
+{
+    int i;
+
+    for (i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+        DeviceState *dev;
+
+        if (!nd->model) {
+            nd->model = g_strdup("virtio");
+        }
+
+        if (strcmp(nd->model, "virtio")) {
+            fprintf(stderr, "S390 only supports VirtIO nics\n");
+            exit(1);
+        }
+
+        dev = qdev_create(bus, name);
+        qdev_set_nic_properties(dev, nd);
+        qdev_init_nofail(dev);
+    }
+}
+
+/* PC hardware initialisation */
+static void s390_init(QEMUMachineInitArgs *args)
+{
+    ram_addr_t my_ram_size = args->ram_size;
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
+    int shift = 0;
+    uint8_t *storage_keys;
+    void *virtio_region;
+    hwaddr virtio_region_len;
+    hwaddr virtio_region_start;
+
+    /* s390x ram size detection needs a 16bit multiplier + an increment. So
+       guests > 64GB can be specified in 2MB steps etc. */
+    while ((my_ram_size >> (20 + shift)) > 65535) {
+        shift++;
+    }
+    my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
+
+    /* lets propagate the changed ram size into the global variable. */
+    ram_size = my_ram_size;
+
+    /* get a BUS */
+    s390_bus = s390_virtio_bus_init(&my_ram_size);
+    s390_sclp_init();
+    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+                      args->initrd_filename);
+
+    /* register hypercalls */
+    s390_virtio_register_hcalls();
+
+    /* allocate RAM */
+    memory_region_init_ram(ram, "s390.ram", my_ram_size);
+    vmstate_register_ram_global(ram);
+    memory_region_add_subregion(sysmem, 0, ram);
+
+    /* clear virtio region */
+    virtio_region_len = my_ram_size - ram_size;
+    virtio_region_start = ram_size;
+    virtio_region = cpu_physical_memory_map(virtio_region_start,
+                                            &virtio_region_len, true);
+    memset(virtio_region, 0, virtio_region_len);
+    cpu_physical_memory_unmap(virtio_region, virtio_region_len, 1,
+                              virtio_region_len);
+
+    /* allocate storage keys */
+    storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
+
+    /* init CPUs */
+    s390_init_cpus(args->cpu_model, storage_keys);
+
+    /* Create VirtIO network adapters */
+    s390_create_virtio_net((BusState *)s390_bus, "virtio-net-s390");
+}
+
+static QEMUMachine s390_machine = {
+    .name = "s390-virtio",
+    .alias = "s390",
+    .desc = "VirtIO based S390 machine",
+    .init = s390_init,
+    .block_default_type = IF_VIRTIO,
+    .no_cdrom = 1,
+    .no_floppy = 1,
+    .no_serial = 1,
+    .no_parallel = 1,
+    .no_sdcard = 1,
+    .use_virtcon = 1,
+    .max_cpus = 255,
+    .is_default = 1,
+    DEFAULT_MACHINE_OPTIONS,
+};
+
+static void s390_machine_init(void)
+{
+    qemu_register_machine(&s390_machine);
+}
+
+machine_init(s390_machine_init);
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
new file mode 100644
index 0000000..67bfd20
--- /dev/null
+++ b/hw/s390x/s390-virtio.h
@@ -0,0 +1,27 @@
+/*
+ * Virtio interfaces for s390
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_VIRTIO_H
+#define HW_S390_VIRTIO_H 1
+
+#define KVM_S390_VIRTIO_NOTIFY          0
+#define KVM_S390_VIRTIO_RESET           1
+#define KVM_S390_VIRTIO_SET_STATUS      2
+
+typedef int (*s390_virtio_fn)(const uint64_t *args);
+void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
+
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys);
+void s390_init_ipl_dev(const char *kernel_filename,
+                       const char *kernel_cmdline,
+                       const char *initrd_filename);
+void s390_create_virtio_net(BusState *bus, const char *name);
+#endif
commit 93726cb31982f5d930a4d5838215307390636d9b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Jan 10 04:40:25 2013 +0000

    virtio-s390: add a reset function to virtio-s390 devices
    
    virtio-s390 devices are not being reset when their bus is.  To fix
    this, add a reset method that forwards to virtio_reset.  This is
    only needed because of the "strange" modeling of virtio devices;
    the ->vdev link is being handled manually rather than through qdev.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index b5d1f2b..6858db0 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -508,6 +508,13 @@ static int s390_virtio_busdev_init(DeviceState *dev)
     return _info->init(_dev);
 }
 
+static void s390_virtio_busdev_reset(DeviceState *dev)
+{
+    VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
+
+    virtio_reset(_dev->vdev);
+}
+
 static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -515,6 +522,7 @@ static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
     dc->init = s390_virtio_busdev_init;
     dc->bus_type = TYPE_S390_VIRTIO_BUS;
     dc->unplug = qdev_simple_unplug_cb;
+    dc->reset = s390_virtio_busdev_reset;
 }
 
 static const TypeInfo virtio_s390_device_info = {
commit 49973ebc039f644fce3e73ff8019efaa795bd83b
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Jan 24 19:11:26 2013 +0100

    s390: Make typeinfo const
    
    All TypeInfo definitions should be const.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 7cbbf99..86e8415 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -159,7 +159,7 @@ static void s390_ipl_class_init(ObjectClass *klass, void *data)
     dc->no_user = 1;
 }
 
-static TypeInfo s390_ipl_info = {
+static const TypeInfo s390_ipl_info = {
     .class_init = s390_ipl_class_init,
     .parent = TYPE_SYS_BUS_DEVICE,
     .name  = "s390-ipl",
commit a5cf2bb4e3827732b1b6740bddd022eb19988e0a
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 06:08:55 2013 +0000

    s390: Add new channel I/O based virtio transport.
    
    Add a new virtio transport that uses channel commands to perform
    virtio operations.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index ab99da6..f6b461b 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -7,3 +7,4 @@ obj-y += event-facility.o
 obj-y += sclpquiesce.o sclpconsole.o
 obj-y += ipl.o
 obj-y += css.o
+obj-y += virtio-ccw.o
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
new file mode 100644
index 0000000..8c9b745
--- /dev/null
+++ b/hw/s390x/virtio-ccw.c
@@ -0,0 +1,960 @@
+/*
+ * virtio ccw target implementation
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "hw/hw.h"
+#include "block/block.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "monitor/monitor.h"
+#include "hw/virtio.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-net.h"
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "hw/virtio-bus.h"
+
+#include "ioinst.h"
+#include "css.h"
+#include "virtio-ccw.h"
+#include "trace.h"
+
+static int virtual_css_bus_reset(BusState *qbus)
+{
+    /* This should actually be modelled via the generic css */
+    css_reset();
+
+    /* we dont traverse ourself, return 0 */
+    return 0;
+}
+
+
+static void virtual_css_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    k->reset = virtual_css_bus_reset;
+}
+
+static const TypeInfo virtual_css_bus_info = {
+    .name = TYPE_VIRTUAL_CSS_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(VirtualCssBus),
+    .class_init = virtual_css_bus_class_init,
+};
+
+static const VirtIOBindings virtio_ccw_bindings;
+
+VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch)
+{
+    VirtIODevice *vdev = NULL;
+
+    if (sch->driver_data) {
+        vdev = ((VirtioCcwDevice *)sch->driver_data)->vdev;
+    }
+    return vdev;
+}
+
+VirtualCssBus *virtual_css_bus_init(void)
+{
+    VirtualCssBus *cbus;
+    BusState *bus;
+    DeviceState *dev;
+
+    /* Create bridge device */
+    dev = qdev_create(NULL, "virtual-css-bridge");
+    qdev_init_nofail(dev);
+
+    /* Create bus on bridge device */
+    bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css");
+    cbus = VIRTUAL_CSS_BUS(bus);
+
+    /* Enable hotplugging */
+    bus->allow_hotplug = 1;
+
+    return cbus;
+}
+
+/* Communication blocks used by several channel commands. */
+typedef struct VqInfoBlock {
+    uint64_t queue;
+    uint32_t align;
+    uint16_t index;
+    uint16_t num;
+} QEMU_PACKED VqInfoBlock;
+
+typedef struct VqConfigBlock {
+    uint16_t index;
+    uint16_t num_max;
+} QEMU_PACKED VqConfigBlock;
+
+typedef struct VirtioFeatDesc {
+    uint32_t features;
+    uint8_t index;
+} QEMU_PACKED VirtioFeatDesc;
+
+/* Specify where the virtqueues for the subchannel are in guest memory. */
+static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align,
+                              uint16_t index, uint16_t num)
+{
+    VirtioCcwDevice *dev = sch->driver_data;
+
+    if (index > VIRTIO_PCI_QUEUE_MAX) {
+        return -EINVAL;
+    }
+
+    /* Current code in virtio.c relies on 4K alignment. */
+    if (addr && (align != 4096)) {
+        return -EINVAL;
+    }
+
+    if (!dev) {
+        return -EINVAL;
+    }
+
+    virtio_queue_set_addr(dev->vdev, index, addr);
+    if (!addr) {
+        virtio_queue_set_vector(dev->vdev, index, 0);
+    } else {
+        /* Fail if we don't have a big enough queue. */
+        /* TODO: Add interface to handle vring.num changing */
+        if (virtio_queue_get_num(dev->vdev, index) > num) {
+            return -EINVAL;
+        }
+        virtio_queue_set_vector(dev->vdev, index, index);
+    }
+    /* tell notify handler in case of config change */
+    dev->vdev->config_vector = VIRTIO_PCI_QUEUE_MAX;
+    return 0;
+}
+
+static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
+{
+    int ret;
+    VqInfoBlock info;
+    uint8_t status;
+    VirtioFeatDesc features;
+    void *config;
+    hwaddr indicators;
+    VqConfigBlock vq_config;
+    VirtioCcwDevice *dev = sch->driver_data;
+    bool check_len;
+    int len;
+    hwaddr hw_len;
+
+    if (!dev) {
+        return -EINVAL;
+    }
+
+    trace_virtio_ccw_interpret_ccw(sch->cssid, sch->ssid, sch->schid,
+                                   ccw.cmd_code);
+    check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
+
+    /* Look at the command. */
+    switch (ccw.cmd_code) {
+    case CCW_CMD_SET_VQ:
+        if (check_len) {
+            if (ccw.count != sizeof(info)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(info)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            info.queue = ldq_phys(ccw.cda);
+            info.align = ldl_phys(ccw.cda + sizeof(info.queue));
+            info.index = lduw_phys(ccw.cda + sizeof(info.queue)
+                                   + sizeof(info.align));
+            info.num = lduw_phys(ccw.cda + sizeof(info.queue)
+                                 + sizeof(info.align)
+                                 + sizeof(info.index));
+            ret = virtio_ccw_set_vqs(sch, info.queue, info.align, info.index,
+                                     info.num);
+            sch->curr_status.scsw.count = 0;
+        }
+        break;
+    case CCW_CMD_VDEV_RESET:
+        virtio_reset(dev->vdev);
+        ret = 0;
+        break;
+    case CCW_CMD_READ_FEAT:
+        if (check_len) {
+            if (ccw.count != sizeof(features)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(features)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            features.index = ldub_phys(ccw.cda + sizeof(features.features));
+            if (features.index < ARRAY_SIZE(dev->host_features)) {
+                features.features = dev->host_features[features.index];
+            } else {
+                /* Return zeroes if the guest supports more feature bits. */
+                features.features = 0;
+            }
+            stl_le_phys(ccw.cda, features.features);
+            sch->curr_status.scsw.count = ccw.count - sizeof(features);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_WRITE_FEAT:
+        if (check_len) {
+            if (ccw.count != sizeof(features)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(features)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            features.index = ldub_phys(ccw.cda + sizeof(features.features));
+            features.features = ldl_le_phys(ccw.cda);
+            if (features.index < ARRAY_SIZE(dev->host_features)) {
+                if (dev->vdev->set_features) {
+                    dev->vdev->set_features(dev->vdev, features.features);
+                }
+                dev->vdev->guest_features = features.features;
+            } else {
+                /*
+                 * If the guest supports more feature bits, assert that it
+                 * passes us zeroes for those we don't support.
+                 */
+                if (features.features) {
+                    fprintf(stderr, "Guest bug: features[%i]=%x (expected 0)\n",
+                            features.index, features.features);
+                    /* XXX: do a unit check here? */
+                }
+            }
+            sch->curr_status.scsw.count = ccw.count - sizeof(features);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_READ_CONF:
+        if (check_len) {
+            if (ccw.count > dev->vdev->config_len) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, dev->vdev->config_len);
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            dev->vdev->get_config(dev->vdev, dev->vdev->config);
+            /* XXX config space endianness */
+            cpu_physical_memory_write(ccw.cda, dev->vdev->config, len);
+            sch->curr_status.scsw.count = ccw.count - len;
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_WRITE_CONF:
+        if (check_len) {
+            if (ccw.count > dev->vdev->config_len) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, dev->vdev->config_len);
+        hw_len = len;
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            config = cpu_physical_memory_map(ccw.cda, &hw_len, 0);
+            if (!config) {
+                ret = -EFAULT;
+            } else {
+                len = hw_len;
+                /* XXX config space endianness */
+                memcpy(dev->vdev->config, config, len);
+                cpu_physical_memory_unmap(config, hw_len, 0, hw_len);
+                if (dev->vdev->set_config) {
+                    dev->vdev->set_config(dev->vdev, dev->vdev->config);
+                }
+                sch->curr_status.scsw.count = ccw.count - len;
+                ret = 0;
+            }
+        }
+        break;
+    case CCW_CMD_WRITE_STATUS:
+        if (check_len) {
+            if (ccw.count != sizeof(status)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(status)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            status = ldub_phys(ccw.cda);
+            virtio_set_status(dev->vdev, status);
+            if (dev->vdev->status == 0) {
+                virtio_reset(dev->vdev);
+            }
+            sch->curr_status.scsw.count = ccw.count - sizeof(status);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_SET_IND:
+        if (check_len) {
+            if (ccw.count != sizeof(indicators)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(indicators)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        indicators = ldq_phys(ccw.cda);
+        if (!indicators) {
+            ret = -EFAULT;
+        } else {
+            dev->indicators = indicators;
+            sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_SET_CONF_IND:
+        if (check_len) {
+            if (ccw.count != sizeof(indicators)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(indicators)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        indicators = ldq_phys(ccw.cda);
+        if (!indicators) {
+            ret = -EFAULT;
+        } else {
+            dev->indicators2 = indicators;
+            sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
+            ret = 0;
+        }
+        break;
+    case CCW_CMD_READ_VQ_CONF:
+        if (check_len) {
+            if (ccw.count != sizeof(vq_config)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(vq_config)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            vq_config.index = lduw_phys(ccw.cda);
+            vq_config.num_max = virtio_queue_get_num(dev->vdev,
+                                                     vq_config.index);
+            stw_phys(ccw.cda + sizeof(vq_config.index), vq_config.num_max);
+            sch->curr_status.scsw.count = ccw.count - sizeof(vq_config);
+            ret = 0;
+        }
+        break;
+    default:
+        ret = -EOPNOTSUPP;
+        break;
+    }
+    return ret;
+}
+
+static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev)
+{
+    unsigned int cssid = 0;
+    unsigned int ssid = 0;
+    unsigned int schid;
+    unsigned int devno;
+    bool have_devno = false;
+    bool found = false;
+    SubchDev *sch;
+    int ret;
+    int num;
+    DeviceState *parent = DEVICE(dev);
+
+    sch = g_malloc0(sizeof(SubchDev));
+
+    sch->driver_data = dev;
+    dev->sch = sch;
+
+    dev->vdev = vdev;
+    dev->indicators = 0;
+
+    /* Initialize subchannel structure. */
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    sch->orb = NULL;
+    /*
+     * Use a device number if provided. Otherwise, fall back to subchannel
+     * number.
+     */
+    if (dev->bus_id) {
+        num = sscanf(dev->bus_id, "%x.%x.%04x", &cssid, &ssid, &devno);
+        if (num == 3) {
+            if ((cssid > MAX_CSSID) || (ssid > MAX_SSID)) {
+                ret = -EINVAL;
+                error_report("Invalid cssid or ssid: cssid %x, ssid %x",
+                             cssid, ssid);
+                goto out_err;
+            }
+            /* Enforce use of virtual cssid. */
+            if (cssid != VIRTUAL_CSSID) {
+                ret = -EINVAL;
+                error_report("cssid %x not valid for virtio devices", cssid);
+                goto out_err;
+            }
+            if (css_devno_used(cssid, ssid, devno)) {
+                ret = -EEXIST;
+                error_report("Device %x.%x.%04x already exists", cssid, ssid,
+                             devno);
+                goto out_err;
+            }
+            sch->cssid = cssid;
+            sch->ssid = ssid;
+            sch->devno = devno;
+            have_devno = true;
+        } else {
+            ret = -EINVAL;
+            error_report("Malformed devno parameter '%s'", dev->bus_id);
+            goto out_err;
+        }
+    }
+
+    /* Find the next free id. */
+    if (have_devno) {
+        for (schid = 0; schid <= MAX_SCHID; schid++) {
+            if (!css_find_subch(1, cssid, ssid, schid)) {
+                sch->schid = schid;
+                css_subch_assign(cssid, ssid, schid, devno, sch);
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            ret = -ENODEV;
+            error_report("No free subchannel found for %x.%x.%04x", cssid, ssid,
+                         devno);
+            goto out_err;
+        }
+        trace_virtio_ccw_new_device(cssid, ssid, schid, devno,
+                                    "user-configured");
+    } else {
+        cssid = VIRTUAL_CSSID;
+        for (ssid = 0; ssid <= MAX_SSID; ssid++) {
+            for (schid = 0; schid <= MAX_SCHID; schid++) {
+                if (!css_find_subch(1, cssid, ssid, schid)) {
+                    sch->cssid = cssid;
+                    sch->ssid = ssid;
+                    sch->schid = schid;
+                    devno = schid;
+                    /*
+                     * If the devno is already taken, look further in this
+                     * subchannel set.
+                     */
+                    while (css_devno_used(cssid, ssid, devno)) {
+                        if (devno == MAX_SCHID) {
+                            devno = 0;
+                        } else if (devno == schid - 1) {
+                            ret = -ENODEV;
+                            error_report("No free devno found");
+                            goto out_err;
+                        } else {
+                            devno++;
+                        }
+                    }
+                    sch->devno = devno;
+                    css_subch_assign(cssid, ssid, schid, devno, sch);
+                    found = true;
+                    break;
+                }
+            }
+            if (found) {
+                break;
+            }
+        }
+        if (!found) {
+            ret = -ENODEV;
+            error_report("Virtual channel subsystem is full!");
+            goto out_err;
+        }
+        trace_virtio_ccw_new_device(cssid, ssid, schid, devno,
+                                    "auto-configured");
+    }
+
+    /* Build initial schib. */
+    css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE);
+
+    sch->ccw_cb = virtio_ccw_cb;
+
+    /* Build senseid data. */
+    memset(&sch->id, 0, sizeof(SenseId));
+    sch->id.reserved = 0xff;
+    sch->id.cu_type = VIRTIO_CCW_CU_TYPE;
+    sch->id.cu_model = dev->vdev->device_id;
+
+    virtio_bind_device(vdev, &virtio_ccw_bindings, DEVICE(dev));
+    /* Only the first 32 feature bits are used. */
+    dev->host_features[0] = vdev->get_features(vdev, dev->host_features[0]);
+    dev->host_features[0] |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
+    dev->host_features[0] |= 0x1 << VIRTIO_F_BAD_FEATURE;
+
+    css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid,
+                          parent->hotplugged, 1);
+    return 0;
+
+out_err:
+    dev->sch = NULL;
+    g_free(sch);
+    return ret;
+}
+
+static int virtio_ccw_exit(VirtioCcwDevice *dev)
+{
+    SubchDev *sch = dev->sch;
+
+    if (sch) {
+        css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
+        g_free(sch);
+    }
+    dev->indicators = 0;
+    return 0;
+}
+
+static int virtio_ccw_net_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_net_exit(VirtioCcwDevice *dev)
+{
+    virtio_net_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_blk_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_blk_init((DeviceState *)dev, &dev->blk);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_blk_exit(VirtioCcwDevice *dev)
+{
+    virtio_blk_exit(dev->vdev);
+    blockdev_mark_auto_del(dev->blk.conf.bs);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_serial_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_serial_init((DeviceState *)dev, &dev->serial);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_serial_exit(VirtioCcwDevice *dev)
+{
+    virtio_serial_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_balloon_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_balloon_init((DeviceState *)dev);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_balloon_exit(VirtioCcwDevice *dev)
+{
+    virtio_balloon_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_scsi_init(VirtioCcwDevice *dev)
+{
+    VirtIODevice *vdev;
+
+    vdev = virtio_scsi_init((DeviceState *)dev, &dev->scsi);
+    if (!vdev) {
+        return -1;
+    }
+
+    return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_scsi_exit(VirtioCcwDevice *dev)
+{
+    virtio_scsi_exit(dev->vdev);
+    return virtio_ccw_exit(dev);
+}
+
+/* DeviceState to VirtioCcwDevice. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtioCcwDevice *to_virtio_ccw_dev_fast(DeviceState *d)
+{
+    return container_of(d, VirtioCcwDevice, parent_obj);
+}
+
+static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
+{
+    VirtioCcwDevice *dev = to_virtio_ccw_dev_fast(d);
+    SubchDev *sch = dev->sch;
+    uint64_t indicators;
+
+    if (vector >= 128) {
+        return;
+    }
+
+    if (vector < VIRTIO_PCI_QUEUE_MAX) {
+        indicators = ldq_phys(dev->indicators);
+        set_bit(vector, &indicators);
+        stq_phys(dev->indicators, indicators);
+    } else {
+        vector = 0;
+        indicators = ldq_phys(dev->indicators2);
+        set_bit(vector, &indicators);
+        stq_phys(dev->indicators2, indicators);
+    }
+
+    css_conditional_io_interrupt(sch);
+
+}
+
+static unsigned virtio_ccw_get_features(DeviceState *d)
+{
+    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+
+    /* Only the first 32 feature bits are used. */
+    return dev->host_features[0];
+}
+
+static void virtio_ccw_reset(DeviceState *d)
+{
+    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+
+    virtio_reset(dev->vdev);
+    css_reset_sch(dev->sch);
+}
+
+/**************** Virtio-ccw Bus Device Descriptions *******************/
+
+static const VirtIOBindings virtio_ccw_bindings = {
+    .notify = virtio_ccw_notify,
+    .get_features = virtio_ccw_get_features,
+};
+
+static Property virtio_ccw_net_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_NET_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_NIC_PROPERTIES(VirtioCcwDevice, nic),
+    DEFINE_PROP_UINT32("x-txtimer", VirtioCcwDevice,
+                       net.txtimer, TX_TIMER_INTERVAL),
+    DEFINE_PROP_INT32("x-txburst", VirtioCcwDevice,
+                      net.txburst, TX_BURST),
+    DEFINE_PROP_STRING("tx", VirtioCcwDevice, net.tx),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_net_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_net_init;
+    k->exit = virtio_ccw_net_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_net_properties;
+}
+
+static const TypeInfo virtio_ccw_net = {
+    .name          = "virtio-net-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_net_class_init,
+};
+
+static Property virtio_ccw_blk_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_BLOCK_PROPERTIES(VirtioCcwDevice, blk.conf),
+    DEFINE_PROP_STRING("serial", VirtioCcwDevice, blk.serial),
+#ifdef __linux__
+    DEFINE_PROP_BIT("scsi", VirtioCcwDevice, blk.scsi, 0, true),
+#endif
+    DEFINE_VIRTIO_BLK_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_blk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_blk_init;
+    k->exit = virtio_ccw_blk_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_blk_properties;
+}
+
+static const TypeInfo virtio_ccw_blk = {
+    .name          = "virtio-blk-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_blk_class_init,
+};
+
+static Property virtio_ccw_serial_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_PROP_UINT32("max_ports", VirtioCcwDevice,
+                       serial.max_virtserial_ports, 31),
+    DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_serial_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_serial_init;
+    k->exit = virtio_ccw_serial_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_serial_properties;
+}
+
+static const TypeInfo virtio_ccw_serial = {
+    .name          = "virtio-serial-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_serial_class_init,
+};
+
+static Property virtio_ccw_balloon_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_balloon_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_balloon_init;
+    k->exit = virtio_ccw_balloon_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_balloon_properties;
+}
+
+static const TypeInfo virtio_ccw_balloon = {
+    .name          = "virtio-balloon-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_balloon_class_init,
+};
+
+static Property virtio_ccw_scsi_properties[] = {
+    DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+    DEFINE_VIRTIO_SCSI_PROPERTIES(VirtioCcwDevice, host_features[0], scsi),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_scsi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->init = virtio_ccw_scsi_init;
+    k->exit = virtio_ccw_scsi_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_scsi_properties;
+}
+
+static const TypeInfo virtio_ccw_scsi = {
+    .name          = "virtio-scsi-ccw",
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init    = virtio_ccw_scsi_class_init,
+};
+
+static int virtio_ccw_busdev_init(DeviceState *dev)
+{
+    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+    VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
+
+    virtio_ccw_bus_new(&_dev->bus, _dev);
+
+    return _info->init(_dev);
+}
+
+static int virtio_ccw_busdev_exit(DeviceState *dev)
+{
+    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+    VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
+
+    return _info->exit(_dev);
+}
+
+static int virtio_ccw_busdev_unplug(DeviceState *dev)
+{
+    VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+    SubchDev *sch = _dev->sch;
+
+    /*
+     * We should arrive here only for device_del, since we don't support
+     * direct hot(un)plug of channels, but only through virtio.
+     */
+    assert(sch != NULL);
+    /* Subchannel is now disabled and no longer valid. */
+    sch->curr_status.pmcw.flags &= ~(PMCW_FLAGS_MASK_ENA |
+                                     PMCW_FLAGS_MASK_DNV);
+
+    css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, 1, 0);
+
+    object_unparent(OBJECT(dev));
+    qdev_free(dev);
+    return 0;
+}
+
+static void virtio_ccw_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->init = virtio_ccw_busdev_init;
+    dc->exit = virtio_ccw_busdev_exit;
+    dc->unplug = virtio_ccw_busdev_unplug;
+    dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
+
+}
+
+static const TypeInfo virtio_ccw_device_info = {
+    .name = TYPE_VIRTIO_CCW_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(VirtioCcwDevice),
+    .class_init = virtio_ccw_device_class_init,
+    .class_size = sizeof(VirtIOCCWDeviceClass),
+    .abstract = true,
+};
+
+/***************** Virtual-css Bus Bridge Device ********************/
+/* Only required to have the virtio bus as child in the system bus */
+
+static int virtual_css_bridge_init(SysBusDevice *dev)
+{
+    /* nothing */
+    return 0;
+}
+
+static void virtual_css_bridge_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = virtual_css_bridge_init;
+    dc->no_user = 1;
+}
+
+static const TypeInfo virtual_css_bridge_info = {
+    .name          = "virtual-css-bridge",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SysBusDevice),
+    .class_init    = virtual_css_bridge_class_init,
+};
+
+/* virtio-ccw-bus */
+
+void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev)
+{
+    DeviceState *qdev = DEVICE(dev);
+    BusState *qbus;
+
+    qbus_create_inplace((BusState *)bus, TYPE_VIRTIO_CCW_BUS, qdev, NULL);
+    qbus = BUS(bus);
+    qbus->allow_hotplug = 0;
+}
+
+static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data)
+{
+    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
+    BusClass *bus_class = BUS_CLASS(klass);
+
+    bus_class->max_dev = 1;
+    k->notify = virtio_ccw_notify;
+    k->get_features = virtio_ccw_get_features;
+}
+
+static const TypeInfo virtio_ccw_bus_info = {
+    .name = TYPE_VIRTIO_CCW_BUS,
+    .parent = TYPE_VIRTIO_BUS,
+    .instance_size = sizeof(VirtioCcwBusState),
+    .class_init = virtio_ccw_bus_class_init,
+};
+
+static void virtio_ccw_register(void)
+{
+    type_register_static(&virtio_ccw_bus_info);
+    type_register_static(&virtual_css_bus_info);
+    type_register_static(&virtio_ccw_device_info);
+    type_register_static(&virtio_ccw_serial);
+    type_register_static(&virtio_ccw_blk);
+    type_register_static(&virtio_ccw_net);
+    type_register_static(&virtio_ccw_balloon);
+    type_register_static(&virtio_ccw_scsi);
+    type_register_static(&virtual_css_bridge_info);
+}
+
+type_init(virtio_ccw_register)
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
new file mode 100644
index 0000000..48474b3
--- /dev/null
+++ b/hw/s390x/virtio-ccw.h
@@ -0,0 +1,98 @@
+/*
+ * virtio ccw target definitions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_VIRTIO_CCW_H
+#define HW_S390X_VIRTIO_CCW_H
+
+#include <hw/virtio-blk.h>
+#include <hw/virtio-net.h>
+#include <hw/virtio-serial.h>
+#include <hw/virtio-scsi.h>
+#include <hw/virtio-bus.h>
+
+#define VIRTUAL_CSSID 0xfe
+
+#define VIRTIO_CCW_CU_TYPE 0x3832
+#define VIRTIO_CCW_CHPID_TYPE 0x32
+
+#define CCW_CMD_SET_VQ       0x13
+#define CCW_CMD_VDEV_RESET   0x33
+#define CCW_CMD_READ_FEAT    0x12
+#define CCW_CMD_WRITE_FEAT   0x11
+#define CCW_CMD_READ_CONF    0x22
+#define CCW_CMD_WRITE_CONF   0x21
+#define CCW_CMD_WRITE_STATUS 0x31
+#define CCW_CMD_SET_IND      0x43
+#define CCW_CMD_SET_CONF_IND 0x53
+#define CCW_CMD_READ_VQ_CONF 0x32
+
+#define TYPE_VIRTIO_CCW_DEVICE "virtio-ccw-device"
+#define VIRTIO_CCW_DEVICE(obj) \
+     OBJECT_CHECK(VirtioCcwDevice, (obj), TYPE_VIRTIO_CCW_DEVICE)
+#define VIRTIO_CCW_DEVICE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(VirtIOCCWDeviceClass, (klass), TYPE_VIRTIO_CCW_DEVICE)
+#define VIRTIO_CCW_DEVICE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(VirtIOCCWDeviceClass, (obj), TYPE_VIRTIO_CCW_DEVICE)
+
+typedef struct VirtioBusState VirtioCcwBusState;
+typedef struct VirtioBusClass VirtioCcwBusClass;
+
+#define TYPE_VIRTIO_CCW_BUS "virtio-ccw-bus"
+#define VIRTIO_CCW_BUS(obj) \
+     OBJECT_CHECK(VirtioCcwBus, (obj), TYPE_VIRTIO_CCW_BUS)
+#define VIRTIO_CCW_BUS_GET_CLASS(obj) \
+    OBJECT_CHECK(VirtioCcwBusState, (obj), TYPE_VIRTIO_CCW_BUS)
+#define VIRTIO_CCW_BUS_CLASS(klass) \
+    OBJECT_CLASS_CHECK(VirtioCcwBusClass, klass, TYPE_VIRTIO_CCW_BUS)
+
+typedef struct VirtioCcwDevice VirtioCcwDevice;
+
+void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev);
+
+typedef struct VirtIOCCWDeviceClass {
+    DeviceClass parent_class;
+    int (*init)(VirtioCcwDevice *dev);
+    int (*exit)(VirtioCcwDevice *dev);
+} VirtIOCCWDeviceClass;
+
+/* Change here if we want to support more feature bits. */
+#define VIRTIO_CCW_FEATURE_SIZE 1
+
+struct VirtioCcwDevice {
+    DeviceState parent_obj;
+    SubchDev *sch;
+    VirtIODevice *vdev;
+    char *bus_id;
+    VirtIOBlkConf blk;
+    NICConf nic;
+    uint32_t host_features[VIRTIO_CCW_FEATURE_SIZE];
+    virtio_serial_conf serial;
+    virtio_net_conf net;
+    VirtIOSCSIConf scsi;
+    VirtioBusState bus;
+    /* Guest provided values: */
+    hwaddr indicators;
+    hwaddr indicators2;
+};
+
+/* virtual css bus type */
+typedef struct VirtualCssBus {
+    BusState parent_obj;
+} VirtualCssBus;
+
+#define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus"
+#define VIRTUAL_CSS_BUS(obj) \
+     OBJECT_CHECK(VirtualCssBus, (obj), TYPE_VIRTUAL_CSS_BUS)
+
+VirtualCssBus *virtual_css_bus_init(void);
+void virtio_ccw_device_update_status(SubchDev *sch);
+VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch);
+#endif
diff --git a/trace-events b/trace-events
index 71a1111..1011f27 100644
--- a/trace-events
+++ b/trace-events
@@ -1086,3 +1086,7 @@ css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type) "CSS: add chpid %x.%02
 css_new_image(uint8_t cssid, const char *default_cssid) "CSS: add css image %02x %s"
 css_assign_subch(const char *do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno) "CSS: %s %x.%x.%04x (devno %04x)"
 css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char *conditional) "CSS: I/O interrupt on sch %x.%x.%04x (intparm %08x, isc %x) %s"
+
+# hw/s390x/virtio-ccw.c
+virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command %x"
+virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno %04x (%s)"
commit fad37673f53ac8a2e0575c084a26e5f6bb59957d
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:09 2013 +0000

    s390-virtio: Factor out some initialization code.
    
    Some of the machine initialization for s390-virtio will be reused
    by virtio-ccw.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 5edaabb..6e0f53b 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -147,13 +147,73 @@ unsigned s390_del_running_cpu(CPUS390XState *env)
     return s390_running_cpus;
 }
 
+void s390_init_ipl_dev(const char *kernel_filename,
+                       const char *kernel_cmdline,
+                       const char *initrd_filename)
+{
+    DeviceState *dev;
+
+    dev  = qdev_create(NULL, "s390-ipl");
+    if (kernel_filename) {
+        qdev_prop_set_string(dev, "kernel", kernel_filename);
+    }
+    if (initrd_filename) {
+        qdev_prop_set_string(dev, "initrd", initrd_filename);
+    }
+    qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
+    qdev_init_nofail(dev);
+}
+
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys)
+{
+    int i;
+
+    if (cpu_model == NULL) {
+        cpu_model = "host";
+    }
+
+    ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
+
+    for (i = 0; i < smp_cpus; i++) {
+        S390CPU *cpu;
+
+        cpu = cpu_s390x_init(cpu_model);
+
+        ipi_states[i] = cpu;
+        cpu->env.halted = 1;
+        cpu->env.exception_index = EXCP_HLT;
+        cpu->env.storage_keys = storage_keys;
+    }
+}
+
+
+void s390_create_virtio_net(BusState *bus, const char *name)
+{
+    int i;
+
+    for (i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+        DeviceState *dev;
+
+        if (!nd->model) {
+            nd->model = g_strdup("virtio");
+        }
+
+        if (strcmp(nd->model, "virtio")) {
+            fprintf(stderr, "S390 only supports VirtIO nics\n");
+            exit(1);
+        }
+
+        dev = qdev_create(bus, name);
+        qdev_set_nic_properties(dev, nd);
+        qdev_init_nofail(dev);
+    }
+}
+
 /* PC hardware initialisation */
 static void s390_init(QEMUMachineInitArgs *args)
 {
     ram_addr_t my_ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    CPUS390XState *env = NULL;
-    DeviceState *dev;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     int shift = 0;
@@ -161,7 +221,6 @@ static void s390_init(QEMUMachineInitArgs *args)
     void *virtio_region;
     hwaddr virtio_region_len;
     hwaddr virtio_region_start;
-    int i;
 
     /* s390x ram size detection needs a 16bit multiplier + an increment. So
        guests > 64GB can be specified in 2MB steps etc. */
@@ -176,15 +235,8 @@ static void s390_init(QEMUMachineInitArgs *args)
     /* get a BUS */
     s390_bus = s390_virtio_bus_init(&my_ram_size);
     s390_sclp_init();
-    dev  = qdev_create(NULL, "s390-ipl");
-    if (args->kernel_filename) {
-        qdev_prop_set_string(dev, "kernel", args->kernel_filename);
-    }
-    if (args->initrd_filename) {
-        qdev_prop_set_string(dev, "initrd", args->initrd_filename);
-    }
-    qdev_prop_set_string(dev, "cmdline", args->kernel_cmdline);
-    qdev_init_nofail(dev);
+    s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+                      args->initrd_filename);
 
     /* register hypercalls */
     s390_virtio_register_hcalls();
@@ -207,46 +259,10 @@ static void s390_init(QEMUMachineInitArgs *args)
     storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
 
     /* init CPUs */
-    if (cpu_model == NULL) {
-        cpu_model = "host";
-    }
-
-    ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
-
-    for (i = 0; i < smp_cpus; i++) {
-        S390CPU *cpu;
-        CPUS390XState *tmp_env;
-
-        cpu = cpu_s390x_init(cpu_model);
-        tmp_env = &cpu->env;
-        if (!env) {
-            env = tmp_env;
-        }
-        ipi_states[i] = cpu;
-        tmp_env->halted = 1;
-        tmp_env->exception_index = EXCP_HLT;
-        tmp_env->storage_keys = storage_keys;
-    }
-
+    s390_init_cpus(args->cpu_model, storage_keys);
 
     /* Create VirtIO network adapters */
-    for(i = 0; i < nb_nics; i++) {
-        NICInfo *nd = &nd_table[i];
-        DeviceState *dev;
-
-        if (!nd->model) {
-            nd->model = g_strdup("virtio");
-        }
-
-        if (strcmp(nd->model, "virtio")) {
-            fprintf(stderr, "S390 only supports VirtIO nics\n");
-            exit(1);
-        }
-
-        dev = qdev_create((BusState *)s390_bus, "virtio-net-s390");
-        qdev_set_nic_properties(dev, nd);
-        qdev_init_nofail(dev);
-    }
+    s390_create_virtio_net((BusState *)s390_bus, "virtio-net-s390");
 }
 
 static QEMUMachine s390_machine = {
diff --git a/hw/s390-virtio.h b/hw/s390-virtio.h
index 25bb610..67bfd20 100644
--- a/hw/s390-virtio.h
+++ b/hw/s390-virtio.h
@@ -19,4 +19,9 @@
 typedef int (*s390_virtio_fn)(const uint64_t *args);
 void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
 
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys);
+void s390_init_ipl_dev(const char *kernel_filename,
+                       const char *kernel_cmdline,
+                       const char *initrd_filename);
+void s390_create_virtio_net(BusState *bus, const char *name);
 #endif
commit 09b998782978f95f626236b39f0be99c02a014a9
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:07 2013 +0000

    s390: Wire up channel I/O in kvm.
    
    Trigger the code for our virtual css in case of instruction
    intercepts for I/O instructions.
    
    Handle the tsch exit for the subchannel-related part of tsch.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 778065c..ce12fa4 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -1058,6 +1058,13 @@ void QEMU_NORETURN runtime_exception(CPUS390XState *env, int excp,
 
 #include <sysemu/kvm.h>
 
+#ifdef CONFIG_KVM
+void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
+                           uint16_t subchannel_nr, uint32_t io_int_parm,
+                           uint32_t io_int_word);
+void kvm_s390_crw_mchk(S390CPU *cpu);
+void kvm_s390_enable_css_support(S390CPU *cpu);
+#else
 static inline void kvm_s390_io_interrupt(S390CPU *cpu,
                                         uint16_t subchannel_id,
                                         uint16_t subchannel_nr,
@@ -1068,6 +1075,10 @@ static inline void kvm_s390_io_interrupt(S390CPU *cpu,
 static inline void kvm_s390_crw_mchk(S390CPU *cpu)
 {
 }
+static inline void kvm_s390_enable_css_support(S390CPU *cpu)
+{
+}
+#endif
 
 static inline void s390_io_interrupt(S390CPU *cpu,
                                      uint16_t subchannel_id,
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 99deddf..2c24182 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -47,9 +47,29 @@
 
 #define IPA0_DIAG                       0x8300
 #define IPA0_SIGP                       0xae00
-#define IPA0_PRIV                       0xb200
+#define IPA0_B2                         0xb200
+#define IPA0_B9                         0xb900
+#define IPA0_EB                         0xeb00
 
 #define PRIV_SCLP_CALL                  0x20
+#define PRIV_CSCH                       0x30
+#define PRIV_HSCH                       0x31
+#define PRIV_MSCH                       0x32
+#define PRIV_SSCH                       0x33
+#define PRIV_STSCH                      0x34
+#define PRIV_TSCH                       0x35
+#define PRIV_TPI                        0x36
+#define PRIV_SAL                        0x37
+#define PRIV_RSCH                       0x38
+#define PRIV_STCRW                      0x39
+#define PRIV_STCPS                      0x3a
+#define PRIV_RCHP                       0x3b
+#define PRIV_SCHM                       0x3c
+#define PRIV_CHSC                       0x5f
+#define PRIV_SIGA                       0x74
+#define PRIV_XSCH                       0x76
+#define PRIV_SQBS                       0x8a
+#define PRIV_EQBS                       0x9c
 #define DIAG_KVM_HYPERCALL              0x500
 #define DIAG_KVM_BREAKPOINT             0x501
 
@@ -380,10 +400,123 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run,
     return 0;
 }
 
-static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
+static int kvm_handle_css_inst(S390CPU *cpu, struct kvm_run *run,
+                               uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
+{
+    int r = 0;
+    int no_cc = 0;
+    CPUS390XState *env = &cpu->env;
+
+    if (ipa0 != 0xb2) {
+        /* Not handled for now. */
+        return -1;
+    }
+    cpu_synchronize_state(env);
+    switch (ipa1) {
+    case PRIV_XSCH:
+        r = ioinst_handle_xsch(env, env->regs[1]);
+        break;
+    case PRIV_CSCH:
+        r = ioinst_handle_csch(env, env->regs[1]);
+        break;
+    case PRIV_HSCH:
+        r = ioinst_handle_hsch(env, env->regs[1]);
+        break;
+    case PRIV_MSCH:
+        r = ioinst_handle_msch(env, env->regs[1], run->s390_sieic.ipb);
+        break;
+    case PRIV_SSCH:
+        r = ioinst_handle_ssch(env, env->regs[1], run->s390_sieic.ipb);
+        break;
+    case PRIV_STCRW:
+        r = ioinst_handle_stcrw(env, run->s390_sieic.ipb);
+        break;
+    case PRIV_STSCH:
+        r = ioinst_handle_stsch(env, env->regs[1], run->s390_sieic.ipb);
+        break;
+    case PRIV_TSCH:
+        /* We should only get tsch via KVM_EXIT_S390_TSCH. */
+        fprintf(stderr, "Spurious tsch intercept\n");
+        break;
+    case PRIV_CHSC:
+        r = ioinst_handle_chsc(env, run->s390_sieic.ipb);
+        break;
+    case PRIV_TPI:
+        /* This should have been handled by kvm already. */
+        fprintf(stderr, "Spurious tpi intercept\n");
+        break;
+    case PRIV_SCHM:
+        no_cc = 1;
+        r = ioinst_handle_schm(env, env->regs[1], env->regs[2],
+                               run->s390_sieic.ipb);
+        break;
+    case PRIV_RSCH:
+        r = ioinst_handle_rsch(env, env->regs[1]);
+        break;
+    case PRIV_RCHP:
+        r = ioinst_handle_rchp(env, env->regs[1]);
+        break;
+    case PRIV_STCPS:
+        /* We do not provide this instruction, it is suppressed. */
+        no_cc = 1;
+        r = 0;
+        break;
+    case PRIV_SAL:
+        no_cc = 1;
+        r = ioinst_handle_sal(env, env->regs[1]);
+        break;
+    default:
+        r = -1;
+        break;
+    }
+
+    if (r >= 0) {
+        if (!no_cc) {
+            setcc(cpu, r);
+        }
+        r = 0;
+    } else if (r < -1) {
+        r = 0;
+    }
+    return r;
+}
+
+static int is_ioinst(uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
+{
+    int ret = 0;
+    uint16_t ipa = (ipa0 << 8) | ipa1;
+
+    switch (ipa) {
+    case IPA0_B2 | PRIV_CSCH:
+    case IPA0_B2 | PRIV_HSCH:
+    case IPA0_B2 | PRIV_MSCH:
+    case IPA0_B2 | PRIV_SSCH:
+    case IPA0_B2 | PRIV_STSCH:
+    case IPA0_B2 | PRIV_TPI:
+    case IPA0_B2 | PRIV_SAL:
+    case IPA0_B2 | PRIV_RSCH:
+    case IPA0_B2 | PRIV_STCRW:
+    case IPA0_B2 | PRIV_STCPS:
+    case IPA0_B2 | PRIV_RCHP:
+    case IPA0_B2 | PRIV_SCHM:
+    case IPA0_B2 | PRIV_CHSC:
+    case IPA0_B2 | PRIV_SIGA:
+    case IPA0_B2 | PRIV_XSCH:
+    case IPA0_B9 | PRIV_EQBS:
+    case IPA0_EB | PRIV_SQBS:
+        ret = 1;
+        break;
+    }
+
+    return ret;
+}
+
+static int handle_priv(S390CPU *cpu, struct kvm_run *run,
+                       uint8_t ipa0, uint8_t ipa1)
 {
     int r = 0;
     uint16_t ipbh0 = (run->s390_sieic.ipb & 0xffff0000) >> 16;
+    uint8_t ipb = run->s390_sieic.ipb & 0xff;
 
     dprintf("KVM: PRIV: %d\n", ipa1);
     switch (ipa1) {
@@ -391,8 +524,16 @@ static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
             r = kvm_sclp_service_call(cpu, run, ipbh0);
             break;
         default:
-            dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
-            r = -1;
+            if (is_ioinst(ipa0, ipa1, ipb)) {
+                r = kvm_handle_css_inst(cpu, run, ipa0, ipa1, ipb);
+                if (r == -1) {
+                    setcc(cpu, 3);
+                    r = 0;
+                }
+            } else {
+                dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
+                r = -1;
+            }
             break;
     }
 
@@ -533,15 +674,17 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
 
     dprintf("handle_instruction 0x%x 0x%x\n", run->s390_sieic.ipa, run->s390_sieic.ipb);
     switch (ipa0) {
-        case IPA0_PRIV:
-            r = handle_priv(cpu, run, ipa1);
-            break;
-        case IPA0_DIAG:
-            r = handle_diag(env, run, ipb_code);
-            break;
-        case IPA0_SIGP:
-            r = handle_sigp(cpu, run, ipa1);
-            break;
+    case IPA0_B2:
+    case IPA0_B9:
+    case IPA0_EB:
+        r = handle_priv(cpu, run, ipa0 >> 8, ipa1);
+        break;
+    case IPA0_DIAG:
+        r = handle_diag(env, run, ipb_code);
+        break;
+    case IPA0_SIGP:
+        r = handle_sigp(cpu, run, ipa1);
+        break;
     }
 
     if (r < 0) {
@@ -600,6 +743,43 @@ static int handle_intercept(S390CPU *cpu)
     return r;
 }
 
+static int handle_tsch(S390CPU *cpu)
+{
+    CPUS390XState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+    struct kvm_run *run = cs->kvm_run;
+    int ret;
+
+    cpu_synchronize_state(env);
+    ret = ioinst_handle_tsch(env, env->regs[1], run->s390_tsch.ipb);
+    if (ret >= 0) {
+        /* Success; set condition code. */
+        setcc(cpu, ret);
+        ret = 0;
+    } else if (ret < -1) {
+        /*
+         * Failure.
+         * If an I/O interrupt had been dequeued, we have to reinject it.
+         */
+        if (run->s390_tsch.dequeued) {
+            uint16_t subchannel_id = run->s390_tsch.subchannel_id;
+            uint16_t subchannel_nr = run->s390_tsch.subchannel_nr;
+            uint32_t io_int_parm = run->s390_tsch.io_int_parm;
+            uint32_t io_int_word = run->s390_tsch.io_int_word;
+            uint32_t type = ((subchannel_id & 0xff00) << 24) |
+                ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16);
+
+            kvm_s390_interrupt_internal(cpu, type,
+                                        ((uint32_t)subchannel_id << 16)
+                                        | subchannel_nr,
+                                        ((uint64_t)io_int_parm << 32)
+                                        | io_int_word, 1);
+        }
+        ret = 0;
+    }
+    return ret;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     S390CPU *cpu = S390_CPU(cs);
@@ -612,6 +792,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         case KVM_EXIT_S390_RESET:
             qemu_system_reset_request();
             break;
+        case KVM_EXIT_S390_TSCH:
+            ret = handle_tsch(cpu);
+            break;
         default:
             fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason);
             break;
@@ -637,3 +820,33 @@ int kvm_arch_on_sigbus(int code, void *addr)
 {
     return 1;
 }
+
+void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
+                           uint16_t subchannel_nr, uint32_t io_int_parm,
+                           uint32_t io_int_word)
+{
+    uint32_t type;
+
+    type = ((subchannel_id & 0xff00) << 24) |
+        ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16);
+    kvm_s390_interrupt_internal(cpu, type,
+                                ((uint32_t)subchannel_id << 16) | subchannel_nr,
+                                ((uint64_t)io_int_parm << 32) | io_int_word, 1);
+}
+
+void kvm_s390_crw_mchk(S390CPU *cpu)
+{
+    kvm_s390_interrupt_internal(cpu, KVM_S390_MCHK, 1 << 28,
+                                0x00400f1d40330000, 1);
+}
+
+void kvm_s390_enable_css_support(S390CPU *cpu)
+{
+    struct kvm_enable_cap cap = {};
+    int r;
+
+    /* Activate host kernel channel subsystem support. */
+    cap.cap = KVM_CAP_S390_CSS_SUPPORT;
+    r = kvm_vcpu_ioctl(CPU(cpu), KVM_ENABLE_CAP, &cap);
+    assert(r == 0);
+}
commit df1fe5bb49241baddf1f319a6ecbe0885e875afa
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:06 2013 +0000

    s390: Virtual channel subsystem support.
    
    Provide a mechanism for qemu to provide fully virtual subchannels to
    the guest.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 1b40c2e..ab99da6 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -6,3 +6,4 @@ obj-y += sclp.o
 obj-y += event-facility.o
 obj-y += sclpquiesce.o sclpconsole.o
 obj-y += ipl.o
+obj-y += css.o
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
new file mode 100644
index 0000000..113ac9a
--- /dev/null
+++ b/hw/s390x/css.c
@@ -0,0 +1,1277 @@
+/*
+ * Channel subsystem base support.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <hw/qdev.h>
+#include "qemu/bitops.h"
+#include "cpu.h"
+#include "ioinst.h"
+#include "css.h"
+#include "trace.h"
+
+typedef struct CrwContainer {
+    CRW crw;
+    QTAILQ_ENTRY(CrwContainer) sibling;
+} CrwContainer;
+
+typedef struct ChpInfo {
+    uint8_t in_use;
+    uint8_t type;
+    uint8_t is_virtual;
+} ChpInfo;
+
+typedef struct SubchSet {
+    SubchDev *sch[MAX_SCHID + 1];
+    unsigned long schids_used[BITS_TO_LONGS(MAX_SCHID + 1)];
+    unsigned long devnos_used[BITS_TO_LONGS(MAX_SCHID + 1)];
+} SubchSet;
+
+typedef struct CssImage {
+    SubchSet *sch_set[MAX_SSID + 1];
+    ChpInfo chpids[MAX_CHPID + 1];
+} CssImage;
+
+typedef struct ChannelSubSys {
+    QTAILQ_HEAD(, CrwContainer) pending_crws;
+    bool do_crw_mchk;
+    bool crws_lost;
+    uint8_t max_cssid;
+    uint8_t max_ssid;
+    bool chnmon_active;
+    uint64_t chnmon_area;
+    CssImage *css[MAX_CSSID + 1];
+    uint8_t default_cssid;
+} ChannelSubSys;
+
+static ChannelSubSys *channel_subsys;
+
+int css_create_css_image(uint8_t cssid, bool default_image)
+{
+    trace_css_new_image(cssid, default_image ? "(default)" : "");
+    if (cssid > MAX_CSSID) {
+        return -EINVAL;
+    }
+    if (channel_subsys->css[cssid]) {
+        return -EBUSY;
+    }
+    channel_subsys->css[cssid] = g_malloc0(sizeof(CssImage));
+    if (default_image) {
+        channel_subsys->default_cssid = cssid;
+    }
+    return 0;
+}
+
+static uint16_t css_build_subchannel_id(SubchDev *sch)
+{
+    if (channel_subsys->max_cssid > 0) {
+        return (sch->cssid << 8) | (1 << 3) | (sch->ssid << 1) | 1;
+    }
+    return (sch->ssid << 1) | 1;
+}
+
+static void css_inject_io_interrupt(SubchDev *sch)
+{
+    S390CPU *cpu = s390_cpu_addr2state(0);
+    uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
+
+    trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid,
+                           sch->curr_status.pmcw.intparm, isc, "");
+    s390_io_interrupt(cpu,
+                      css_build_subchannel_id(sch),
+                      sch->schid,
+                      sch->curr_status.pmcw.intparm,
+                      (0x80 >> isc) << 24);
+}
+
+void css_conditional_io_interrupt(SubchDev *sch)
+{
+    /*
+     * If the subchannel is not currently status pending, make it pending
+     * with alert status.
+     */
+    if (!(sch->curr_status.scsw.ctrl & SCSW_STCTL_STATUS_PEND)) {
+        S390CPU *cpu = s390_cpu_addr2state(0);
+        uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
+
+        trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid,
+                               sch->curr_status.pmcw.intparm, isc,
+                               "(unsolicited)");
+        sch->curr_status.scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
+        sch->curr_status.scsw.ctrl |=
+            SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+        /* Inject an I/O interrupt. */
+        s390_io_interrupt(cpu,
+                          css_build_subchannel_id(sch),
+                          sch->schid,
+                          sch->curr_status.pmcw.intparm,
+                          (0x80 >> isc) << 24);
+    }
+}
+
+static void sch_handle_clear_func(SubchDev *sch)
+{
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int path;
+
+    /* Path management: In our simple css, we always choose the only path. */
+    path = 0x80;
+
+    /* Reset values prior to 'issueing the clear signal'. */
+    p->lpum = 0;
+    p->pom = 0xff;
+    s->flags &= ~SCSW_FLAGS_MASK_PNO;
+
+    /* We always 'attempt to issue the clear signal', and we always succeed. */
+    sch->orb = NULL;
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    s->ctrl &= ~SCSW_ACTL_CLEAR_PEND;
+    s->ctrl |= SCSW_STCTL_STATUS_PEND;
+
+    s->dstat = 0;
+    s->cstat = 0;
+    p->lpum = path;
+
+}
+
+static void sch_handle_halt_func(SubchDev *sch)
+{
+
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int path;
+
+    /* Path management: In our simple css, we always choose the only path. */
+    path = 0x80;
+
+    /* We always 'attempt to issue the halt signal', and we always succeed. */
+    sch->orb = NULL;
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    s->ctrl &= ~SCSW_ACTL_HALT_PEND;
+    s->ctrl |= SCSW_STCTL_STATUS_PEND;
+
+    if ((s->ctrl & (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) ||
+        !((s->ctrl & SCSW_ACTL_START_PEND) ||
+          (s->ctrl & SCSW_ACTL_SUSP))) {
+        s->dstat = SCSW_DSTAT_DEVICE_END;
+    }
+    s->cstat = 0;
+    p->lpum = path;
+
+}
+
+static void copy_sense_id_to_guest(SenseId *dest, SenseId *src)
+{
+    int i;
+
+    dest->reserved = src->reserved;
+    dest->cu_type = cpu_to_be16(src->cu_type);
+    dest->cu_model = src->cu_model;
+    dest->dev_type = cpu_to_be16(src->dev_type);
+    dest->dev_model = src->dev_model;
+    dest->unused = src->unused;
+    for (i = 0; i < ARRAY_SIZE(dest->ciw); i++) {
+        dest->ciw[i].type = src->ciw[i].type;
+        dest->ciw[i].command = src->ciw[i].command;
+        dest->ciw[i].count = cpu_to_be16(src->ciw[i].count);
+    }
+}
+
+static CCW1 copy_ccw_from_guest(hwaddr addr)
+{
+    CCW1 tmp;
+    CCW1 ret;
+
+    cpu_physical_memory_read(addr, &tmp, sizeof(tmp));
+    ret.cmd_code = tmp.cmd_code;
+    ret.flags = tmp.flags;
+    ret.count = be16_to_cpu(tmp.count);
+    ret.cda = be32_to_cpu(tmp.cda);
+
+    return ret;
+}
+
+static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
+{
+    int ret;
+    bool check_len;
+    int len;
+    CCW1 ccw;
+
+    if (!ccw_addr) {
+        return -EIO;
+    }
+
+    ccw = copy_ccw_from_guest(ccw_addr);
+
+    /* Check for invalid command codes. */
+    if ((ccw.cmd_code & 0x0f) == 0) {
+        return -EINVAL;
+    }
+    if (((ccw.cmd_code & 0x0f) == CCW_CMD_TIC) &&
+        ((ccw.cmd_code & 0xf0) != 0)) {
+        return -EINVAL;
+    }
+
+    if (ccw.flags & CCW_FLAG_SUSPEND) {
+        return -ERESTART;
+    }
+
+    check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
+
+    /* Look at the command. */
+    switch (ccw.cmd_code) {
+    case CCW_CMD_NOOP:
+        /* Nothing to do. */
+        ret = 0;
+        break;
+    case CCW_CMD_BASIC_SENSE:
+        if (check_len) {
+            if (ccw.count != sizeof(sch->sense_data)) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, sizeof(sch->sense_data));
+        cpu_physical_memory_write(ccw.cda, sch->sense_data, len);
+        sch->curr_status.scsw.count = ccw.count - len;
+        memset(sch->sense_data, 0, sizeof(sch->sense_data));
+        ret = 0;
+        break;
+    case CCW_CMD_SENSE_ID:
+    {
+        SenseId sense_id;
+
+        copy_sense_id_to_guest(&sense_id, &sch->id);
+        /* Sense ID information is device specific. */
+        if (check_len) {
+            if (ccw.count != sizeof(sense_id)) {
+                ret = -EINVAL;
+                break;
+            }
+        }
+        len = MIN(ccw.count, sizeof(sense_id));
+        /*
+         * Only indicate 0xff in the first sense byte if we actually
+         * have enough place to store at least bytes 0-3.
+         */
+        if (len >= 4) {
+            sense_id.reserved = 0xff;
+        } else {
+            sense_id.reserved = 0;
+        }
+        cpu_physical_memory_write(ccw.cda, &sense_id, len);
+        sch->curr_status.scsw.count = ccw.count - len;
+        ret = 0;
+        break;
+    }
+    case CCW_CMD_TIC:
+        if (sch->last_cmd_valid && (sch->last_cmd.cmd_code == CCW_CMD_TIC)) {
+            ret = -EINVAL;
+            break;
+        }
+        if (ccw.flags & (CCW_FLAG_CC | CCW_FLAG_DC)) {
+            ret = -EINVAL;
+            break;
+        }
+        sch->channel_prog = ccw.cda;
+        ret = -EAGAIN;
+        break;
+    default:
+        if (sch->ccw_cb) {
+            /* Handle device specific commands. */
+            ret = sch->ccw_cb(sch, ccw);
+        } else {
+            ret = -EOPNOTSUPP;
+        }
+        break;
+    }
+    sch->last_cmd = ccw;
+    sch->last_cmd_valid = true;
+    if (ret == 0) {
+        if (ccw.flags & CCW_FLAG_CC) {
+            sch->channel_prog += 8;
+            ret = -EAGAIN;
+        }
+    }
+
+    return ret;
+}
+
+static void sch_handle_start_func(SubchDev *sch)
+{
+
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    ORB *orb = sch->orb;
+    int path;
+    int ret;
+
+    /* Path management: In our simple css, we always choose the only path. */
+    path = 0x80;
+
+    if (!(s->ctrl & SCSW_ACTL_SUSP)) {
+        /* Look at the orb and try to execute the channel program. */
+        p->intparm = orb->intparm;
+        if (!(orb->lpm & path)) {
+            /* Generate a deferred cc 3 condition. */
+            s->flags |= SCSW_FLAGS_MASK_CC;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
+            return;
+        }
+    } else {
+        s->ctrl &= ~(SCSW_ACTL_SUSP | SCSW_ACTL_RESUME_PEND);
+    }
+    sch->last_cmd_valid = false;
+    do {
+        ret = css_interpret_ccw(sch, sch->channel_prog);
+        switch (ret) {
+        case -EAGAIN:
+            /* ccw chain, continue processing */
+            break;
+        case 0:
+            /* success */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_STATUS_PEND;
+            s->dstat = SCSW_DSTAT_CHANNEL_END | SCSW_DSTAT_DEVICE_END;
+            break;
+        case -EOPNOTSUPP:
+            /* unsupported command, generate unit check (command reject) */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->dstat = SCSW_DSTAT_UNIT_CHECK;
+            /* Set sense bit 0 in ecw0. */
+            sch->sense_data[0] = 0x80;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        case -EFAULT:
+            /* memory problem, generate channel data check */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->cstat = SCSW_CSTAT_DATA_CHECK;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        case -EBUSY:
+            /* subchannel busy, generate deferred cc 1 */
+            s->flags &= ~SCSW_FLAGS_MASK_CC;
+            s->flags |= (1 << 8);
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        case -ERESTART:
+            /* channel program has been suspended */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->ctrl |= SCSW_ACTL_SUSP;
+            break;
+        default:
+            /* error, generate channel program check */
+            s->ctrl &= ~SCSW_ACTL_START_PEND;
+            s->cstat = SCSW_CSTAT_PROG_CHECK;
+            s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+            s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+            break;
+        }
+    } while (ret == -EAGAIN);
+
+}
+
+/*
+ * On real machines, this would run asynchronously to the main vcpus.
+ * We might want to make some parts of the ssch handling (interpreting
+ * read/writes) asynchronous later on if we start supporting more than
+ * our current very simple devices.
+ */
+static void do_subchannel_work(SubchDev *sch)
+{
+
+    SCSW *s = &sch->curr_status.scsw;
+
+    if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
+        sch_handle_clear_func(sch);
+    } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
+        sch_handle_halt_func(sch);
+    } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
+        sch_handle_start_func(sch);
+    } else {
+        /* Cannot happen. */
+        return;
+    }
+    css_inject_io_interrupt(sch);
+}
+
+static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src)
+{
+    int i;
+
+    dest->intparm = cpu_to_be32(src->intparm);
+    dest->flags = cpu_to_be16(src->flags);
+    dest->devno = cpu_to_be16(src->devno);
+    dest->lpm = src->lpm;
+    dest->pnom = src->pnom;
+    dest->lpum = src->lpum;
+    dest->pim = src->pim;
+    dest->mbi = cpu_to_be16(src->mbi);
+    dest->pom = src->pom;
+    dest->pam = src->pam;
+    for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) {
+        dest->chpid[i] = src->chpid[i];
+    }
+    dest->chars = cpu_to_be32(src->chars);
+}
+
+static void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
+{
+    dest->flags = cpu_to_be16(src->flags);
+    dest->ctrl = cpu_to_be16(src->ctrl);
+    dest->cpa = cpu_to_be32(src->cpa);
+    dest->dstat = src->dstat;
+    dest->cstat = src->cstat;
+    dest->count = cpu_to_be16(src->count);
+}
+
+static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
+{
+    int i;
+
+    copy_pmcw_to_guest(&dest->pmcw, &src->pmcw);
+    copy_scsw_to_guest(&dest->scsw, &src->scsw);
+    dest->mba = cpu_to_be64(src->mba);
+    for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
+        dest->mda[i] = src->mda[i];
+    }
+}
+
+int css_do_stsch(SubchDev *sch, SCHIB *schib)
+{
+    /* Use current status. */
+    copy_schib_to_guest(schib, &sch->curr_status);
+    return 0;
+}
+
+static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src)
+{
+    int i;
+
+    dest->intparm = be32_to_cpu(src->intparm);
+    dest->flags = be16_to_cpu(src->flags);
+    dest->devno = be16_to_cpu(src->devno);
+    dest->lpm = src->lpm;
+    dest->pnom = src->pnom;
+    dest->lpum = src->lpum;
+    dest->pim = src->pim;
+    dest->mbi = be16_to_cpu(src->mbi);
+    dest->pom = src->pom;
+    dest->pam = src->pam;
+    for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) {
+        dest->chpid[i] = src->chpid[i];
+    }
+    dest->chars = be32_to_cpu(src->chars);
+}
+
+static void copy_scsw_from_guest(SCSW *dest, const SCSW *src)
+{
+    dest->flags = be16_to_cpu(src->flags);
+    dest->ctrl = be16_to_cpu(src->ctrl);
+    dest->cpa = be32_to_cpu(src->cpa);
+    dest->dstat = src->dstat;
+    dest->cstat = src->cstat;
+    dest->count = be16_to_cpu(src->count);
+}
+
+static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src)
+{
+    int i;
+
+    copy_pmcw_from_guest(&dest->pmcw, &src->pmcw);
+    copy_scsw_from_guest(&dest->scsw, &src->scsw);
+    dest->mba = be64_to_cpu(src->mba);
+    for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
+        dest->mda[i] = src->mda[i];
+    }
+}
+
+int css_do_msch(SubchDev *sch, SCHIB *orig_schib)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+    SCHIB schib;
+
+    if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_DNV)) {
+        ret = 0;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl &
+        (SCSW_FCTL_START_FUNC|SCSW_FCTL_HALT_FUNC|SCSW_FCTL_CLEAR_FUNC)) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    copy_schib_from_guest(&schib, orig_schib);
+    /* Only update the program-modifiable fields. */
+    p->intparm = schib.pmcw.intparm;
+    p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+                  PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+                  PMCW_FLAGS_MASK_MP);
+    p->flags |= schib.pmcw.flags &
+            (PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+             PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+             PMCW_FLAGS_MASK_MP);
+    p->lpm = schib.pmcw.lpm;
+    p->mbi = schib.pmcw.mbi;
+    p->pom = schib.pmcw.pom;
+    p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
+    p->chars |= schib.pmcw.chars &
+            (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
+    sch->curr_status.mba = schib.mba;
+
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_xsch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (!(s->ctrl & SCSW_CTRL_MASK_FCTL) ||
+        ((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
+        (!(s->ctrl &
+           (SCSW_ACTL_RESUME_PEND | SCSW_ACTL_START_PEND | SCSW_ACTL_SUSP))) ||
+        (s->ctrl & SCSW_ACTL_SUBCH_ACTIVE)) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_CTRL_MASK_STCTL) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    /* Cancel the current operation. */
+    s->ctrl &= ~(SCSW_FCTL_START_FUNC |
+                 SCSW_ACTL_RESUME_PEND |
+                 SCSW_ACTL_START_PEND |
+                 SCSW_ACTL_SUSP);
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    sch->orb = NULL;
+    s->dstat = 0;
+    s->cstat = 0;
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_csch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    /* Trigger the clear function. */
+    s->ctrl &= ~(SCSW_CTRL_MASK_FCTL | SCSW_CTRL_MASK_ACTL);
+    s->ctrl |= SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_CLEAR_FUNC;
+
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_hsch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_STATUS_PEND) ||
+        (s->ctrl & (SCSW_STCTL_PRIMARY |
+                    SCSW_STCTL_SECONDARY |
+                    SCSW_STCTL_ALERT))) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    /* Trigger the halt function. */
+    s->ctrl |= SCSW_FCTL_HALT_FUNC;
+    s->ctrl &= ~SCSW_FCTL_START_FUNC;
+    if (((s->ctrl & SCSW_CTRL_MASK_ACTL) ==
+         (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) &&
+        ((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_INTERMEDIATE)) {
+        s->ctrl &= ~SCSW_STCTL_STATUS_PEND;
+    }
+    s->ctrl |= SCSW_ACTL_HALT_PEND;
+
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static void css_update_chnmon(SubchDev *sch)
+{
+    if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_MME)) {
+        /* Not active. */
+        return;
+    }
+    /* The counter is conveniently located at the beginning of the struct. */
+    if (sch->curr_status.pmcw.chars & PMCW_CHARS_MASK_MBFC) {
+        /* Format 1, per-subchannel area. */
+        uint32_t count;
+
+        count = ldl_phys(sch->curr_status.mba);
+        count++;
+        stl_phys(sch->curr_status.mba, count);
+    } else {
+        /* Format 0, global area. */
+        uint32_t offset;
+        uint16_t count;
+
+        offset = sch->curr_status.pmcw.mbi << 5;
+        count = lduw_phys(channel_subsys->chnmon_area + offset);
+        count++;
+        stw_phys(channel_subsys->chnmon_area + offset, count);
+    }
+}
+
+int css_do_ssch(SubchDev *sch, ORB *orb)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (s->ctrl & (SCSW_FCTL_START_FUNC |
+                   SCSW_FCTL_HALT_FUNC |
+                   SCSW_FCTL_CLEAR_FUNC)) {
+        ret = -EBUSY;
+        goto out;
+    }
+
+    /* If monitoring is active, update counter. */
+    if (channel_subsys->chnmon_active) {
+        css_update_chnmon(sch);
+    }
+    sch->orb = orb;
+    sch->channel_prog = orb->cpa;
+    /* Trigger the start function. */
+    s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND);
+    s->flags &= ~SCSW_FLAGS_MASK_PNO;
+
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+static void copy_irb_to_guest(IRB *dest, const IRB *src)
+{
+    int i;
+
+    copy_scsw_to_guest(&dest->scsw, &src->scsw);
+
+    for (i = 0; i < ARRAY_SIZE(dest->esw); i++) {
+        dest->esw[i] = cpu_to_be32(src->esw[i]);
+    }
+    for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) {
+        dest->ecw[i] = cpu_to_be32(src->ecw[i]);
+    }
+    for (i = 0; i < ARRAY_SIZE(dest->emw); i++) {
+        dest->emw[i] = cpu_to_be32(src->emw[i]);
+    }
+}
+
+int css_do_tsch(SubchDev *sch, IRB *target_irb)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    uint16_t stctl;
+    uint16_t fctl;
+    uint16_t actl;
+    IRB irb;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = 3;
+        goto out;
+    }
+
+    stctl = s->ctrl & SCSW_CTRL_MASK_STCTL;
+    fctl = s->ctrl & SCSW_CTRL_MASK_FCTL;
+    actl = s->ctrl & SCSW_CTRL_MASK_ACTL;
+
+    /* Prepare the irb for the guest. */
+    memset(&irb, 0, sizeof(IRB));
+
+    /* Copy scsw from current status. */
+    memcpy(&irb.scsw, s, sizeof(SCSW));
+    if (stctl & SCSW_STCTL_STATUS_PEND) {
+        if (s->cstat & (SCSW_CSTAT_DATA_CHECK |
+                        SCSW_CSTAT_CHN_CTRL_CHK |
+                        SCSW_CSTAT_INTF_CTRL_CHK)) {
+            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF;
+            irb.esw[0] = 0x04804000;
+        } else {
+            irb.esw[0] = 0x00800000;
+        }
+        /* If a unit check is pending, copy sense data. */
+        if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) &&
+            (p->chars & PMCW_CHARS_MASK_CSENSE)) {
+            irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL;
+            memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data));
+            irb.esw[1] = 0x02000000 | (sizeof(sch->sense_data) << 8);
+        }
+    }
+    /* Store the irb to the guest. */
+    copy_irb_to_guest(target_irb, &irb);
+
+    /* Clear conditions on subchannel, if applicable. */
+    if (stctl & SCSW_STCTL_STATUS_PEND) {
+        s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+        if ((stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) ||
+            ((fctl & SCSW_FCTL_HALT_FUNC) &&
+             (actl & SCSW_ACTL_SUSP))) {
+            s->ctrl &= ~SCSW_CTRL_MASK_FCTL;
+        }
+        if (stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) {
+            s->flags &= ~SCSW_FLAGS_MASK_PNO;
+            s->ctrl &= ~(SCSW_ACTL_RESUME_PEND |
+                         SCSW_ACTL_START_PEND |
+                         SCSW_ACTL_HALT_PEND |
+                         SCSW_ACTL_CLEAR_PEND |
+                         SCSW_ACTL_SUSP);
+        } else {
+            if ((actl & SCSW_ACTL_SUSP) &&
+                (fctl & SCSW_FCTL_START_FUNC)) {
+                s->flags &= ~SCSW_FLAGS_MASK_PNO;
+                if (fctl & SCSW_FCTL_HALT_FUNC) {
+                    s->ctrl &= ~(SCSW_ACTL_RESUME_PEND |
+                                 SCSW_ACTL_START_PEND |
+                                 SCSW_ACTL_HALT_PEND |
+                                 SCSW_ACTL_CLEAR_PEND |
+                                 SCSW_ACTL_SUSP);
+                } else {
+                    s->ctrl &= ~SCSW_ACTL_RESUME_PEND;
+                }
+            }
+        }
+        /* Clear pending sense data. */
+        if (p->chars & PMCW_CHARS_MASK_CSENSE) {
+            memset(sch->sense_data, 0 , sizeof(sch->sense_data));
+        }
+    }
+
+    ret = ((stctl & SCSW_STCTL_STATUS_PEND) == 0);
+
+out:
+    return ret;
+}
+
+static void copy_crw_to_guest(CRW *dest, const CRW *src)
+{
+    dest->flags = cpu_to_be16(src->flags);
+    dest->rsid = cpu_to_be16(src->rsid);
+}
+
+int css_do_stcrw(CRW *crw)
+{
+    CrwContainer *crw_cont;
+    int ret;
+
+    crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws);
+    if (crw_cont) {
+        QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling);
+        copy_crw_to_guest(crw, &crw_cont->crw);
+        g_free(crw_cont);
+        ret = 0;
+    } else {
+        /* List was empty, turn crw machine checks on again. */
+        memset(crw, 0, sizeof(*crw));
+        channel_subsys->do_crw_mchk = true;
+        ret = 1;
+    }
+
+    return ret;
+}
+
+int css_do_tpi(uint64_t addr, int lowcore)
+{
+    /* No pending interrupts for !KVM. */
+    return 0;
+ }
+
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+                         int rfmt, void *buf)
+{
+    int i, desc_size;
+    uint32_t words[8];
+    uint32_t chpid_type_word;
+    CssImage *css;
+
+    if (!m && !cssid) {
+        css = channel_subsys->css[channel_subsys->default_cssid];
+    } else {
+        css = channel_subsys->css[cssid];
+    }
+    if (!css) {
+        return 0;
+    }
+    desc_size = 0;
+    for (i = f_chpid; i <= l_chpid; i++) {
+        if (css->chpids[i].in_use) {
+            chpid_type_word = 0x80000000 | (css->chpids[i].type << 8) | i;
+            if (rfmt == 0) {
+                words[0] = cpu_to_be32(chpid_type_word);
+                words[1] = 0;
+                memcpy(buf + desc_size, words, 8);
+                desc_size += 8;
+            } else if (rfmt == 1) {
+                words[0] = cpu_to_be32(chpid_type_word);
+                words[1] = 0;
+                words[2] = 0;
+                words[3] = 0;
+                words[4] = 0;
+                words[5] = 0;
+                words[6] = 0;
+                words[7] = 0;
+                memcpy(buf + desc_size, words, 32);
+                desc_size += 32;
+            }
+        }
+    }
+    return desc_size;
+}
+
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
+{
+    /* dct is currently ignored (not really meaningful for our devices) */
+    /* TODO: Don't ignore mbk. */
+    if (update && !channel_subsys->chnmon_active) {
+        /* Enable measuring. */
+        channel_subsys->chnmon_area = mbo;
+        channel_subsys->chnmon_active = true;
+    }
+    if (!update && channel_subsys->chnmon_active) {
+        /* Disable measuring. */
+        channel_subsys->chnmon_area = 0;
+        channel_subsys->chnmon_active = false;
+    }
+}
+
+int css_do_rsch(SubchDev *sch)
+{
+    SCSW *s = &sch->curr_status.scsw;
+    PMCW *p = &sch->curr_status.pmcw;
+    int ret;
+
+    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+        ret = -ENODEV;
+        goto out;
+    }
+
+    if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+        ret = -EINPROGRESS;
+        goto out;
+    }
+
+    if (((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
+        (s->ctrl & SCSW_ACTL_RESUME_PEND) ||
+        (!(s->ctrl & SCSW_ACTL_SUSP))) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* If monitoring is active, update counter. */
+    if (channel_subsys->chnmon_active) {
+        css_update_chnmon(sch);
+    }
+
+    s->ctrl |= SCSW_ACTL_RESUME_PEND;
+    do_subchannel_work(sch);
+    ret = 0;
+
+out:
+    return ret;
+}
+
+int css_do_rchp(uint8_t cssid, uint8_t chpid)
+{
+    uint8_t real_cssid;
+
+    if (cssid > channel_subsys->max_cssid) {
+        return -EINVAL;
+    }
+    if (channel_subsys->max_cssid == 0) {
+        real_cssid = channel_subsys->default_cssid;
+    } else {
+        real_cssid = cssid;
+    }
+    if (!channel_subsys->css[real_cssid]) {
+        return -EINVAL;
+    }
+
+    if (!channel_subsys->css[real_cssid]->chpids[chpid].in_use) {
+        return -ENODEV;
+    }
+
+    if (!channel_subsys->css[real_cssid]->chpids[chpid].is_virtual) {
+        fprintf(stderr,
+                "rchp unsupported for non-virtual chpid %x.%02x!\n",
+                real_cssid, chpid);
+        return -ENODEV;
+    }
+
+    /* We don't really use a channel path, so we're done here. */
+    css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT,
+                  channel_subsys->max_cssid > 0 ? 1 : 0, chpid);
+    if (channel_subsys->max_cssid > 0) {
+        css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 0, real_cssid << 8);
+    }
+    return 0;
+}
+
+bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+    SubchSet *set;
+
+    if (cssid > MAX_CSSID || ssid > MAX_SSID || !channel_subsys->css[cssid] ||
+        !channel_subsys->css[cssid]->sch_set[ssid]) {
+        return true;
+    }
+    set = channel_subsys->css[cssid]->sch_set[ssid];
+    return schid > find_last_bit(set->schids_used,
+                                 (MAX_SCHID + 1) / sizeof(unsigned long));
+}
+
+static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type)
+{
+    CssImage *css;
+
+    trace_css_chpid_add(cssid, chpid, type);
+    if (cssid > MAX_CSSID) {
+        return -EINVAL;
+    }
+    css = channel_subsys->css[cssid];
+    if (!css) {
+        return -EINVAL;
+    }
+    if (css->chpids[chpid].in_use) {
+        return -EEXIST;
+    }
+    css->chpids[chpid].in_use = 1;
+    css->chpids[chpid].type = type;
+    css->chpids[chpid].is_virtual = 1;
+
+    css_generate_chp_crws(cssid, chpid);
+
+    return 0;
+}
+
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type)
+{
+    PMCW *p = &sch->curr_status.pmcw;
+    SCSW *s = &sch->curr_status.scsw;
+    int i;
+    CssImage *css = channel_subsys->css[sch->cssid];
+
+    assert(css != NULL);
+    memset(p, 0, sizeof(PMCW));
+    p->flags |= PMCW_FLAGS_MASK_DNV;
+    p->devno = sch->devno;
+    /* single path */
+    p->pim = 0x80;
+    p->pom = 0xff;
+    p->pam = 0x80;
+    p->chpid[0] = chpid;
+    if (!css->chpids[chpid].in_use) {
+        css_add_virtual_chpid(sch->cssid, chpid, type);
+    }
+
+    memset(s, 0, sizeof(SCSW));
+    sch->curr_status.mba = 0;
+    for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) {
+        sch->curr_status.mda[i] = 0;
+    }
+}
+
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+    uint8_t real_cssid;
+
+    real_cssid = (!m && (cssid == 0)) ? channel_subsys->default_cssid : cssid;
+
+    if (!channel_subsys->css[real_cssid]) {
+        return NULL;
+    }
+
+    if (!channel_subsys->css[real_cssid]->sch_set[ssid]) {
+        return NULL;
+    }
+
+    return channel_subsys->css[real_cssid]->sch_set[ssid]->sch[schid];
+}
+
+bool css_subch_visible(SubchDev *sch)
+{
+    if (sch->ssid > channel_subsys->max_ssid) {
+        return false;
+    }
+
+    if (sch->cssid != channel_subsys->default_cssid) {
+        return (channel_subsys->max_cssid > 0);
+    }
+
+    return true;
+}
+
+bool css_present(uint8_t cssid)
+{
+    return (channel_subsys->css[cssid] != NULL);
+}
+
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno)
+{
+    if (!channel_subsys->css[cssid]) {
+        return false;
+    }
+    if (!channel_subsys->css[cssid]->sch_set[ssid]) {
+        return false;
+    }
+
+    return !!test_bit(devno,
+                      channel_subsys->css[cssid]->sch_set[ssid]->devnos_used);
+}
+
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                      uint16_t devno, SubchDev *sch)
+{
+    CssImage *css;
+    SubchSet *s_set;
+
+    trace_css_assign_subch(sch ? "assign" : "deassign", cssid, ssid, schid,
+                           devno);
+    if (!channel_subsys->css[cssid]) {
+        fprintf(stderr,
+                "Suspicious call to %s (%x.%x.%04x) for non-existing css!\n",
+                __func__, cssid, ssid, schid);
+        return;
+    }
+    css = channel_subsys->css[cssid];
+
+    if (!css->sch_set[ssid]) {
+        css->sch_set[ssid] = g_malloc0(sizeof(SubchSet));
+    }
+    s_set = css->sch_set[ssid];
+
+    s_set->sch[schid] = sch;
+    if (sch) {
+        set_bit(schid, s_set->schids_used);
+        set_bit(devno, s_set->devnos_used);
+    } else {
+        clear_bit(schid, s_set->schids_used);
+        clear_bit(devno, s_set->devnos_used);
+    }
+}
+
+void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid)
+{
+    CrwContainer *crw_cont;
+
+    trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : "");
+    /* TODO: Maybe use a static crw pool? */
+    crw_cont = g_try_malloc0(sizeof(CrwContainer));
+    if (!crw_cont) {
+        channel_subsys->crws_lost = true;
+        return;
+    }
+    crw_cont->crw.flags = (rsc << 8) | erc;
+    if (chain) {
+        crw_cont->crw.flags |= CRW_FLAGS_MASK_C;
+    }
+    crw_cont->crw.rsid = rsid;
+    if (channel_subsys->crws_lost) {
+        crw_cont->crw.flags |= CRW_FLAGS_MASK_R;
+        channel_subsys->crws_lost = false;
+    }
+
+    QTAILQ_INSERT_TAIL(&channel_subsys->pending_crws, crw_cont, sibling);
+
+    if (channel_subsys->do_crw_mchk) {
+        S390CPU *cpu = s390_cpu_addr2state(0);
+
+        channel_subsys->do_crw_mchk = false;
+        /* Inject crw pending machine check. */
+        s390_crw_mchk(cpu);
+    }
+}
+
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                           int hotplugged, int add)
+{
+    uint8_t guest_cssid;
+    bool chain_crw;
+
+    if (add && !hotplugged) {
+        return;
+    }
+    if (channel_subsys->max_cssid == 0) {
+        /* Default cssid shows up as 0. */
+        guest_cssid = (cssid == channel_subsys->default_cssid) ? 0 : cssid;
+    } else {
+        /* Show real cssid to the guest. */
+        guest_cssid = cssid;
+    }
+    /*
+     * Only notify for higher subchannel sets/channel subsystems if the
+     * guest has enabled it.
+     */
+    if ((ssid > channel_subsys->max_ssid) ||
+        (guest_cssid > channel_subsys->max_cssid) ||
+        ((channel_subsys->max_cssid == 0) &&
+         (cssid != channel_subsys->default_cssid))) {
+        return;
+    }
+    chain_crw = (channel_subsys->max_ssid > 0) ||
+            (channel_subsys->max_cssid > 0);
+    css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, chain_crw ? 1 : 0, schid);
+    if (chain_crw) {
+        css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0,
+                      (guest_cssid << 8) | (ssid << 4));
+    }
+}
+
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid)
+{
+    /* TODO */
+}
+
+int css_enable_mcsse(void)
+{
+    trace_css_enable_facility("mcsse");
+    channel_subsys->max_cssid = MAX_CSSID;
+    return 0;
+}
+
+int css_enable_mss(void)
+{
+    trace_css_enable_facility("mss");
+    channel_subsys->max_ssid = MAX_SSID;
+    return 0;
+}
+
+static void css_init(void)
+{
+    channel_subsys = g_malloc0(sizeof(*channel_subsys));
+    QTAILQ_INIT(&channel_subsys->pending_crws);
+    channel_subsys->do_crw_mchk = true;
+    channel_subsys->crws_lost = false;
+    channel_subsys->chnmon_active = false;
+}
+machine_init(css_init);
+
+void css_reset_sch(SubchDev *sch)
+{
+    PMCW *p = &sch->curr_status.pmcw;
+
+    p->intparm = 0;
+    p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+                  PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+                  PMCW_FLAGS_MASK_MP | PMCW_FLAGS_MASK_TF);
+    p->flags |= PMCW_FLAGS_MASK_DNV;
+    p->devno = sch->devno;
+    p->pim = 0x80;
+    p->lpm = p->pim;
+    p->pnom = 0;
+    p->lpum = 0;
+    p->mbi = 0;
+    p->pom = 0xff;
+    p->pam = 0x80;
+    p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_XMWME |
+                  PMCW_CHARS_MASK_CSENSE);
+
+    memset(&sch->curr_status.scsw, 0, sizeof(sch->curr_status.scsw));
+    sch->curr_status.mba = 0;
+
+    sch->channel_prog = 0x0;
+    sch->last_cmd_valid = false;
+    sch->orb = NULL;
+}
+
+void css_reset(void)
+{
+    CrwContainer *crw_cont;
+
+    /* Clean up monitoring. */
+    channel_subsys->chnmon_active = false;
+    channel_subsys->chnmon_area = 0;
+
+    /* Clear pending CRWs. */
+    while ((crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws))) {
+        QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling);
+        g_free(crw_cont);
+    }
+    channel_subsys->do_crw_mchk = true;
+    channel_subsys->crws_lost = false;
+
+    /* Reset maximum ids. */
+    channel_subsys->max_cssid = 0;
+    channel_subsys->max_ssid = 0;
+}
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
new file mode 100644
index 0000000..85ed05d
--- /dev/null
+++ b/hw/s390x/css.h
@@ -0,0 +1,99 @@
+/*
+ * Channel subsystem structures and definitions.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef CSS_H
+#define CSS_H
+
+#include "ioinst.h"
+
+/* Channel subsystem constants. */
+#define MAX_SCHID 65535
+#define MAX_SSID 3
+#define MAX_CSSID 254 /* 255 is reserved */
+#define MAX_CHPID 255
+
+#define MAX_CIWS 62
+
+typedef struct CIW {
+    uint8_t type;
+    uint8_t command;
+    uint16_t count;
+} QEMU_PACKED CIW;
+
+typedef struct SenseId {
+    /* common part */
+    uint8_t reserved;        /* always 0x'FF' */
+    uint16_t cu_type;        /* control unit type */
+    uint8_t cu_model;        /* control unit model */
+    uint16_t dev_type;       /* device type */
+    uint8_t dev_model;       /* device model */
+    uint8_t unused;          /* padding byte */
+    /* extended part */
+    CIW ciw[MAX_CIWS];       /* variable # of CIWs */
+} QEMU_PACKED SenseId;
+
+/* Channel measurements, from linux/drivers/s390/cio/cmf.c. */
+typedef struct CMB {
+    uint16_t ssch_rsch_count;
+    uint16_t sample_count;
+    uint32_t device_connect_time;
+    uint32_t function_pending_time;
+    uint32_t device_disconnect_time;
+    uint32_t control_unit_queuing_time;
+    uint32_t device_active_only_time;
+    uint32_t reserved[2];
+} QEMU_PACKED CMB;
+
+typedef struct CMBE {
+    uint32_t ssch_rsch_count;
+    uint32_t sample_count;
+    uint32_t device_connect_time;
+    uint32_t function_pending_time;
+    uint32_t device_disconnect_time;
+    uint32_t control_unit_queuing_time;
+    uint32_t device_active_only_time;
+    uint32_t device_busy_time;
+    uint32_t initial_command_response_time;
+    uint32_t reserved[7];
+} QEMU_PACKED CMBE;
+
+struct SubchDev {
+    /* channel-subsystem related things: */
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t schid;
+    uint16_t devno;
+    SCHIB curr_status;
+    uint8_t sense_data[32];
+    hwaddr channel_prog;
+    CCW1 last_cmd;
+    bool last_cmd_valid;
+    ORB *orb;
+    /* transport-provided data: */
+    int (*ccw_cb) (SubchDev *, CCW1);
+    SenseId id;
+    void *driver_data;
+};
+
+typedef SubchDev *(*css_subch_cb_func)(uint8_t m, uint8_t cssid, uint8_t ssid,
+                                       uint16_t schid);
+int css_create_css_image(uint8_t cssid, bool default_image);
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno);
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                      uint16_t devno, SubchDev *sch);
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type);
+void css_reset(void);
+void css_reset_sch(SubchDev *sch);
+void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid);
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+                           int hotplugged, int add);
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
+#endif
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 76a822c..778065c 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -399,6 +399,30 @@ void cpu_unlock(void);
 
 typedef struct SubchDev SubchDev;
 
+#ifndef CONFIG_USER_ONLY
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+                         uint16_t schid);
+bool css_subch_visible(SubchDev *sch);
+void css_conditional_io_interrupt(SubchDev *sch);
+int css_do_stsch(SubchDev *sch, SCHIB *schib);
+bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid);
+int css_do_msch(SubchDev *sch, SCHIB *schib);
+int css_do_xsch(SubchDev *sch);
+int css_do_csch(SubchDev *sch);
+int css_do_hsch(SubchDev *sch);
+int css_do_ssch(SubchDev *sch, ORB *orb);
+int css_do_tsch(SubchDev *sch, IRB *irb);
+int css_do_stcrw(CRW *crw);
+int css_do_tpi(uint64_t addr, int lowcore);
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+                         int rfmt, void *buf);
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo);
+int css_enable_mcsse(void);
+int css_enable_mss(void);
+int css_do_rsch(SubchDev *sch);
+int css_do_rchp(uint8_t cssid, uint8_t chpid);
+bool css_present(uint8_t cssid);
+#else
 static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
                                        uint16_t schid)
 {
@@ -479,6 +503,7 @@ static inline bool css_present(uint8_t cssid)
 {
     return false;
 }
+#endif
 
 static inline void cpu_set_tls(CPUS390XState *env, target_ulong newtls)
 {
@@ -1031,4 +1056,41 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen);
 void QEMU_NORETURN runtime_exception(CPUS390XState *env, int excp,
                                      uintptr_t retaddr);
 
+#include <sysemu/kvm.h>
+
+static inline void kvm_s390_io_interrupt(S390CPU *cpu,
+                                        uint16_t subchannel_id,
+                                        uint16_t subchannel_nr,
+                                        uint32_t io_int_parm,
+                                        uint32_t io_int_word)
+{
+}
+static inline void kvm_s390_crw_mchk(S390CPU *cpu)
+{
+}
+
+static inline void s390_io_interrupt(S390CPU *cpu,
+                                     uint16_t subchannel_id,
+                                     uint16_t subchannel_nr,
+                                     uint32_t io_int_parm,
+                                     uint32_t io_int_word)
+{
+    if (kvm_enabled()) {
+        kvm_s390_io_interrupt(cpu, subchannel_id, subchannel_nr, io_int_parm,
+                              io_int_word);
+    } else {
+        cpu_inject_io(&cpu->env, subchannel_id, subchannel_nr, io_int_parm,
+                      io_int_word);
+    }
+}
+
+static inline void s390_crw_mchk(S390CPU *cpu)
+{
+    if (kvm_enabled()) {
+        kvm_s390_crw_mchk(cpu);
+    } else {
+        cpu_inject_crw_mchk(&cpu->env);
+    }
+}
+
 #endif
diff --git a/trace-events b/trace-events
index b680194..71a1111 100644
--- a/trace-events
+++ b/trace-events
@@ -1078,3 +1078,11 @@ ioinst(const char *insn) "IOINST: %s"
 ioinst_sch_id(const char *insn, int cssid, int ssid, int schid) "IOINST: %s (%x.%x.%04x)"
 ioinst_chp_id(const char *insn, int cssid, int chpid) "IOINST: %s (%x.%02x)"
 ioinst_chsc_cmd(uint16_t cmd, uint16_t len) "IOINST: chsc command %04x, len %04x"
+
+# hw/s390x/css.c
+css_enable_facility(const char *facility) "CSS: enable %s"
+css_crw(uint8_t rsc, uint8_t erc, uint16_t rsid, const char *chained) "CSS: queueing crw: rsc=%x, erc=%x, rsid=%x %s"
+css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type) "CSS: add chpid %x.%02x (type %02x)"
+css_new_image(uint8_t cssid, const char *default_cssid) "CSS: add css image %02x %s"
+css_assign_subch(const char *do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno) "CSS: %s %x.%x.%04x (devno %04x)"
+css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char *conditional) "CSS: I/O interrupt on sch %x.%x.%04x (intparm %08x, isc %x) %s"
commit 7b18aad543300de5da88efef8e4116a3ccbbf897
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:05 2013 +0000

    s390: Add channel I/O instructions.
    
    Provide handlers for (most) channel I/O instructions.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 3e00d38..76a822c 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -147,6 +147,9 @@ static inline void cpu_clone_regs(CPUS390XState *env, target_ulong newsp)
 }
 #endif
 
+/* distinguish between 24 bit and 31 bit addressing */
+#define HIGH_ORDER_BIT 0x80000000
+
 /* Interrupt Codes */
 /* Program Interrupts */
 #define PGM_OPERATION                   0x0001
@@ -331,6 +334,20 @@ void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
                                    int is_write);
 void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
                                     int is_write);
+static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb)
+{
+    hwaddr addr = 0;
+    uint8_t reg;
+
+    reg = ipb >> 28;
+    if (reg > 0) {
+        addr = env->regs[reg];
+    }
+    addr += (ipb >> 16) & 0xfff;
+
+    return addr;
+}
+
 void s390x_tod_timer(void *opaque);
 void s390x_cpu_timer(void *opaque);
 
@@ -380,6 +397,89 @@ static inline unsigned s390_del_running_cpu(CPUS390XState *env)
 void cpu_lock(void);
 void cpu_unlock(void);
 
+typedef struct SubchDev SubchDev;
+
+static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+                                       uint16_t schid)
+{
+    return NULL;
+}
+static inline bool css_subch_visible(SubchDev *sch)
+{
+    return false;
+}
+static inline void css_conditional_io_interrupt(SubchDev *sch)
+{
+}
+static inline int css_do_stsch(SubchDev *sch, SCHIB *schib)
+{
+    return -ENODEV;
+}
+static inline bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+    return true;
+}
+static inline int css_do_msch(SubchDev *sch, SCHIB *schib)
+{
+    return -ENODEV;
+}
+static inline int css_do_xsch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_csch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_hsch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_ssch(SubchDev *sch, ORB *orb)
+{
+    return -ENODEV;
+}
+static inline int css_do_tsch(SubchDev *sch, IRB *irb)
+{
+    return -ENODEV;
+}
+static inline int css_do_stcrw(CRW *crw)
+{
+    return 1;
+}
+static inline int css_do_tpi(uint64_t addr, int lowcore)
+{
+    return 0;
+}
+static inline int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid,
+                                       int rfmt, uint8_t l_chpid, void *buf)
+{
+    return 0;
+}
+static inline void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
+{
+}
+static inline int css_enable_mss(void)
+{
+    return -EINVAL;
+}
+static inline int css_enable_mcsse(void)
+{
+    return -EINVAL;
+}
+static inline int css_do_rsch(SubchDev *sch)
+{
+    return -ENODEV;
+}
+static inline int css_do_rchp(uint8_t cssid, uint8_t chpid)
+{
+    return -ENODEV;
+}
+static inline bool css_present(uint8_t cssid)
+{
+    return false;
+}
+
 static inline void cpu_set_tls(CPUS390XState *env, target_ulong newtls)
 {
     env->aregs[0] = newtls >> 32;
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
index 06a16ee..4ef2d73 100644
--- a/target-s390x/ioinst.c
+++ b/target-s390x/ioinst.c
@@ -13,6 +13,7 @@
 
 #include "cpu.h"
 #include "ioinst.h"
+#include "trace.h"
 
 int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
                                  int *schid)
@@ -34,3 +35,718 @@ int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
     *schid = IOINST_SCHID_NR(value);
     return 0;
 }
+
+int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("xsch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_xsch(sch);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+    return cc;
+}
+
+int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("csch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_csch(sch);
+    }
+    if (ret == -ENODEV) {
+        cc = 3;
+    } else {
+        cc = 0;
+    }
+    return cc;
+}
+
+int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("hsch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_hsch(sch);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+    return cc;
+}
+
+static int ioinst_schib_valid(SCHIB *schib)
+{
+    if ((schib->pmcw.flags & PMCW_FLAGS_MASK_INVALID) ||
+        (schib->pmcw.chars & PMCW_CHARS_MASK_INVALID)) {
+        return 0;
+    }
+    /* Disallow extended measurements for now. */
+    if (schib->pmcw.chars & PMCW_CHARS_MASK_XMWME) {
+        return 0;
+    }
+    return 1;
+}
+
+int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    SCHIB *schib;
+    uint64_t addr;
+    int ret = -ENODEV;
+    int cc;
+    hwaddr len = sizeof(*schib);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("msch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    schib = s390_cpu_physical_memory_map(env, addr, &len, 0);
+    if (!schib || len != sizeof(*schib)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    if (!ioinst_schib_valid(schib)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_msch(sch, schib);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+out:
+    s390_cpu_physical_memory_unmap(env, schib, len, 0);
+    return cc;
+}
+
+static void copy_orb_from_guest(ORB *dest, const ORB *src)
+{
+    dest->intparm = be32_to_cpu(src->intparm);
+    dest->ctrl0 = be16_to_cpu(src->ctrl0);
+    dest->lpm = src->lpm;
+    dest->ctrl1 = src->ctrl1;
+    dest->cpa = be32_to_cpu(src->cpa);
+}
+
+static int ioinst_orb_valid(ORB *orb)
+{
+    if ((orb->ctrl0 & ORB_CTRL0_MASK_INVALID) ||
+        (orb->ctrl1 & ORB_CTRL1_MASK_INVALID)) {
+        return 0;
+    }
+    if ((orb->cpa & HIGH_ORDER_BIT) != 0) {
+        return 0;
+    }
+    return 1;
+}
+
+int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    ORB *orig_orb, orb;
+    uint64_t addr;
+    int ret = -ENODEV;
+    int cc;
+    hwaddr len = sizeof(*orig_orb);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("ssch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    orig_orb = s390_cpu_physical_memory_map(env, addr, &len, 0);
+    if (!orig_orb || len != sizeof(*orig_orb)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    copy_orb_from_guest(&orb, orig_orb);
+    if (!ioinst_orb_valid(&orb)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_ssch(sch, &orb);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+out:
+    s390_cpu_physical_memory_unmap(env, orig_orb, len, 0);
+    return cc;
+}
+
+int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb)
+{
+    CRW *crw;
+    uint64_t addr;
+    int cc;
+    hwaddr len = sizeof(*crw);
+
+    addr = decode_basedisp_s(env, ipb);
+    crw = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!crw || len != sizeof(*crw)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    cc = css_do_stcrw(crw);
+    /* 0 - crw stored, 1 - zeroes stored */
+out:
+    s390_cpu_physical_memory_unmap(env, crw, len, 1);
+    return cc;
+}
+
+int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    uint64_t addr;
+    int cc;
+    SCHIB *schib;
+    hwaddr len = sizeof(*schib);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("stsch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    schib = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!schib || len != sizeof(*schib)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch) {
+        if (css_subch_visible(sch)) {
+            css_do_stsch(sch, schib);
+            cc = 0;
+        } else {
+            /* Indicate no more subchannels in this css/ss */
+            cc = 3;
+        }
+    } else {
+        if (css_schid_final(cssid, ssid, schid)) {
+            cc = 3; /* No more subchannels in this css/ss */
+        } else {
+            /* Store an empty schib. */
+            memset(schib, 0, sizeof(*schib));
+            cc = 0;
+        }
+    }
+out:
+    s390_cpu_physical_memory_unmap(env, schib, len, 1);
+    return cc;
+}
+
+int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    IRB *irb;
+    uint64_t addr;
+    int ret = -ENODEV;
+    int cc;
+    hwaddr len = sizeof(*irb);
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("tsch", cssid, ssid, schid);
+    addr = decode_basedisp_s(env, ipb);
+    irb = s390_cpu_physical_memory_map(env, addr, &len, 1);
+    if (!irb || len != sizeof(*irb)) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        cc = -EIO;
+        goto out;
+    }
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_tsch(sch, irb);
+        /* 0 - status pending, 1 - not status pending */
+        cc = ret;
+    } else {
+        cc = 3;
+    }
+out:
+    s390_cpu_physical_memory_unmap(env, irb, sizeof(*irb), 1);
+    return cc;
+}
+
+typedef struct ChscReq {
+    uint16_t len;
+    uint16_t command;
+    uint32_t param0;
+    uint32_t param1;
+    uint32_t param2;
+} QEMU_PACKED ChscReq;
+
+typedef struct ChscResp {
+    uint16_t len;
+    uint16_t code;
+    uint32_t param;
+    char data[0];
+} QEMU_PACKED ChscResp;
+
+#define CHSC_MIN_RESP_LEN 0x0008
+
+#define CHSC_SCPD 0x0002
+#define CHSC_SCSC 0x0010
+#define CHSC_SDA  0x0031
+
+#define CHSC_SCPD_0_M 0x20000000
+#define CHSC_SCPD_0_C 0x10000000
+#define CHSC_SCPD_0_FMT 0x0f000000
+#define CHSC_SCPD_0_CSSID 0x00ff0000
+#define CHSC_SCPD_0_RFMT 0x00000f00
+#define CHSC_SCPD_0_RES 0xc000f000
+#define CHSC_SCPD_1_RES 0xffffff00
+#define CHSC_SCPD_01_CHPID 0x000000ff
+static void ioinst_handle_chsc_scpd(ChscReq *req, ChscResp *res)
+{
+    uint16_t len = be16_to_cpu(req->len);
+    uint32_t param0 = be32_to_cpu(req->param0);
+    uint32_t param1 = be32_to_cpu(req->param1);
+    uint16_t resp_code;
+    int rfmt;
+    uint16_t cssid;
+    uint8_t f_chpid, l_chpid;
+    int desc_size;
+    int m;
+
+    rfmt = (param0 & CHSC_SCPD_0_RFMT) >> 8;
+    if ((rfmt == 0) ||  (rfmt == 1)) {
+        rfmt = !!(param0 & CHSC_SCPD_0_C);
+    }
+    if ((len != 0x0010) || (param0 & CHSC_SCPD_0_RES) ||
+        (param1 & CHSC_SCPD_1_RES) || req->param2) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+    if (param0 & CHSC_SCPD_0_FMT) {
+        resp_code = 0x0007;
+        goto out_err;
+    }
+    cssid = (param0 & CHSC_SCPD_0_CSSID) >> 16;
+    m = param0 & CHSC_SCPD_0_M;
+    if (cssid != 0) {
+        if (!m || !css_present(cssid)) {
+            resp_code = 0x0008;
+            goto out_err;
+        }
+    }
+    f_chpid = param0 & CHSC_SCPD_01_CHPID;
+    l_chpid = param1 & CHSC_SCPD_01_CHPID;
+    if (l_chpid < f_chpid) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+    /* css_collect_chp_desc() is endian-aware */
+    desc_size = css_collect_chp_desc(m, cssid, f_chpid, l_chpid, rfmt,
+                                     &res->data);
+    res->code = cpu_to_be16(0x0001);
+    res->len = cpu_to_be16(8 + desc_size);
+    res->param = cpu_to_be32(rfmt);
+    return;
+
+  out_err:
+    res->code = cpu_to_be16(resp_code);
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->param = cpu_to_be32(rfmt);
+}
+
+#define CHSC_SCSC_0_M 0x20000000
+#define CHSC_SCSC_0_FMT 0x000f0000
+#define CHSC_SCSC_0_CSSID 0x0000ff00
+#define CHSC_SCSC_0_RES 0xdff000ff
+static void ioinst_handle_chsc_scsc(ChscReq *req, ChscResp *res)
+{
+    uint16_t len = be16_to_cpu(req->len);
+    uint32_t param0 = be32_to_cpu(req->param0);
+    uint8_t cssid;
+    uint16_t resp_code;
+    uint32_t general_chars[510];
+    uint32_t chsc_chars[508];
+
+    if (len != 0x0010) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+
+    if (param0 & CHSC_SCSC_0_FMT) {
+        resp_code = 0x0007;
+        goto out_err;
+    }
+    cssid = (param0 & CHSC_SCSC_0_CSSID) >> 8;
+    if (cssid != 0) {
+        if (!(param0 & CHSC_SCSC_0_M) || !css_present(cssid)) {
+            resp_code = 0x0008;
+            goto out_err;
+        }
+    }
+    if ((param0 & CHSC_SCSC_0_RES) || req->param1 || req->param2) {
+        resp_code = 0x0003;
+        goto out_err;
+    }
+    res->code = cpu_to_be16(0x0001);
+    res->len = cpu_to_be16(4080);
+    res->param = 0;
+
+    memset(general_chars, 0, sizeof(general_chars));
+    memset(chsc_chars, 0, sizeof(chsc_chars));
+
+    general_chars[0] = cpu_to_be32(0x03000000);
+    general_chars[1] = cpu_to_be32(0x00059000);
+
+    chsc_chars[0] = cpu_to_be32(0x40000000);
+    chsc_chars[3] = cpu_to_be32(0x00040000);
+
+    memcpy(res->data, general_chars, sizeof(general_chars));
+    memcpy(res->data + sizeof(general_chars), chsc_chars, sizeof(chsc_chars));
+    return;
+
+  out_err:
+    res->code = cpu_to_be16(resp_code);
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->param = 0;
+}
+
+#define CHSC_SDA_0_FMT 0x0f000000
+#define CHSC_SDA_0_OC 0x0000ffff
+#define CHSC_SDA_0_RES 0xf0ff0000
+#define CHSC_SDA_OC_MCSSE 0x0
+#define CHSC_SDA_OC_MSS 0x2
+static void ioinst_handle_chsc_sda(ChscReq *req, ChscResp *res)
+{
+    uint16_t resp_code = 0x0001;
+    uint16_t len = be16_to_cpu(req->len);
+    uint32_t param0 = be32_to_cpu(req->param0);
+    uint16_t oc;
+    int ret;
+
+    if ((len != 0x0400) || (param0 & CHSC_SDA_0_RES)) {
+        resp_code = 0x0003;
+        goto out;
+    }
+
+    if (param0 & CHSC_SDA_0_FMT) {
+        resp_code = 0x0007;
+        goto out;
+    }
+
+    oc = param0 & CHSC_SDA_0_OC;
+    switch (oc) {
+    case CHSC_SDA_OC_MCSSE:
+        ret = css_enable_mcsse();
+        if (ret == -EINVAL) {
+            resp_code = 0x0101;
+            goto out;
+        }
+        break;
+    case CHSC_SDA_OC_MSS:
+        ret = css_enable_mss();
+        if (ret == -EINVAL) {
+            resp_code = 0x0101;
+            goto out;
+        }
+        break;
+    default:
+        resp_code = 0x0003;
+        goto out;
+    }
+
+out:
+    res->code = cpu_to_be16(resp_code);
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->param = 0;
+}
+
+static void ioinst_handle_chsc_unimplemented(ChscResp *res)
+{
+    res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+    res->code = cpu_to_be16(0x0004);
+    res->param = 0;
+}
+
+int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb)
+{
+    ChscReq *req;
+    ChscResp *res;
+    uint64_t addr;
+    int reg;
+    uint16_t len;
+    uint16_t command;
+    hwaddr map_size = TARGET_PAGE_SIZE;
+    int ret = 0;
+
+    trace_ioinst("chsc");
+    reg = (ipb >> 20) & 0x00f;
+    addr = env->regs[reg];
+    /* Page boundary? */
+    if (addr & 0xfff) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        return -EIO;
+    }
+    req = s390_cpu_physical_memory_map(env, addr, &map_size, 1);
+    if (!req || map_size != TARGET_PAGE_SIZE) {
+        program_interrupt(env, PGM_SPECIFICATION, 2);
+        ret = -EIO;
+        goto out;
+    }
+    len = be16_to_cpu(req->len);
+    /* Length field valid? */
+    if ((len < 16) || (len > 4088) || (len & 7)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        ret = -EIO;
+        goto out;
+    }
+    memset((char *)req + len, 0, TARGET_PAGE_SIZE - len);
+    res = (void *)((char *)req + len);
+    command = be16_to_cpu(req->command);
+    trace_ioinst_chsc_cmd(command, len);
+    switch (command) {
+    case CHSC_SCSC:
+        ioinst_handle_chsc_scsc(req, res);
+        break;
+    case CHSC_SCPD:
+        ioinst_handle_chsc_scpd(req, res);
+        break;
+    case CHSC_SDA:
+        ioinst_handle_chsc_sda(req, res);
+        break;
+    default:
+        ioinst_handle_chsc_unimplemented(res);
+        break;
+    }
+
+out:
+    s390_cpu_physical_memory_unmap(env, req, map_size, 1);
+    return ret;
+}
+
+int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb)
+{
+    uint64_t addr;
+    int lowcore;
+
+    trace_ioinst("tpi");
+    addr = decode_basedisp_s(env, ipb);
+    lowcore = addr ? 0 : 1;
+    if (addr < 8192) {
+        addr += env->psa;
+    } else if ((env->psa <= addr) && (addr < env->psa + 8192)) {
+        addr -= env->psa;
+    }
+    return css_do_tpi(addr, lowcore);
+}
+
+#define SCHM_REG1_RES(_reg) (_reg & 0x000000000ffffffc)
+#define SCHM_REG1_MBK(_reg) ((_reg & 0x00000000f0000000) >> 28)
+#define SCHM_REG1_UPD(_reg) ((_reg & 0x0000000000000002) >> 1)
+#define SCHM_REG1_DCT(_reg) (_reg & 0x0000000000000001)
+
+int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2,
+                       uint32_t ipb)
+{
+    uint8_t mbk;
+    int update;
+    int dct;
+
+    trace_ioinst("schm");
+
+    if (SCHM_REG1_RES(reg1)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    mbk = SCHM_REG1_MBK(reg1);
+    update = SCHM_REG1_UPD(reg1);
+    dct = SCHM_REG1_DCT(reg1);
+
+    if (update && (reg2 & 0x0000000000000fff)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    css_do_schm(mbk, update, dct, update ? reg2 : 0);
+
+    return 0;
+}
+
+int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1)
+{
+    int cssid, ssid, schid, m;
+    SubchDev *sch;
+    int ret = -ENODEV;
+    int cc;
+
+    if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    trace_ioinst_sch_id("rsch", cssid, ssid, schid);
+    sch = css_find_subch(m, cssid, ssid, schid);
+    if (sch && css_subch_visible(sch)) {
+        ret = css_do_rsch(sch);
+    }
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EINVAL:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        cc = 1;
+        break;
+    }
+
+    return cc;
+
+}
+
+#define RCHP_REG1_RES(_reg) (_reg & 0x00000000ff00ff00)
+#define RCHP_REG1_CSSID(_reg) ((_reg & 0x0000000000ff0000) >> 16)
+#define RCHP_REG1_CHPID(_reg) (_reg & 0x00000000000000ff)
+int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1)
+{
+    int cc;
+    uint8_t cssid;
+    uint8_t chpid;
+    int ret;
+
+    if (RCHP_REG1_RES(reg1)) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    cssid = RCHP_REG1_CSSID(reg1);
+    chpid = RCHP_REG1_CHPID(reg1);
+
+    trace_ioinst_chp_id("rchp", cssid, chpid);
+
+    ret = css_do_rchp(cssid, chpid);
+
+    switch (ret) {
+    case -ENODEV:
+        cc = 3;
+        break;
+    case -EBUSY:
+        cc = 2;
+        break;
+    case 0:
+        cc = 0;
+        break;
+    default:
+        /* Invalid channel subsystem. */
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+
+    return cc;
+}
+
+#define SAL_REG1_INVALID(_reg) (_reg & 0x0000000080000000)
+int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1)
+{
+    /* We do not provide address limit checking, so let's suppress it. */
+    if (SAL_REG1_INVALID(reg1) || reg1 & 0x000000000000ffff) {
+        program_interrupt(env, PGM_OPERAND, 2);
+        return -EIO;
+    }
+    return 0;
+}
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
index 037aabc..a59742c 100644
--- a/target-s390x/ioinst.h
+++ b/target-s390x/ioinst.h
@@ -204,4 +204,20 @@ typedef struct CRW {
 
 int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
                                  int *schid);
+int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2,
+                       uint32_t ipb);
+int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1);
+
 #endif
diff --git a/trace-events b/trace-events
index 2b28076..b680194 100644
--- a/trace-events
+++ b/trace-events
@@ -1072,3 +1072,9 @@ xics_ics_eoi(int nr) "ics_eoi: irq %#x"
 hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
 hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
 hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+
+# target-s390x/ioinst.c
+ioinst(const char *insn) "IOINST: %s"
+ioinst_sch_id(const char *insn, int cssid, int ssid, int schid) "IOINST: %s (%x.%x.%04x)"
+ioinst_chp_id(const char *insn, int cssid, int chpid) "IOINST: %s (%x.%02x)"
+ioinst_chsc_cmd(uint16_t cmd, uint16_t len) "IOINST: chsc command %04x, len %04x"
commit 5d69c547d947798cba92d836d06f6e017ba2b19d
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:04 2013 +0000

    s390: I/O interrupt and machine check injection.
    
    I/O interrupts are queued per isc. Only crw pending machine checks
    are supported.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index c1a0040..3e00d38 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -50,6 +50,11 @@
 #define MMU_USER_IDX 1
 
 #define MAX_EXT_QUEUE 16
+#define MAX_IO_QUEUE 16
+#define MAX_MCHK_QUEUE 16
+
+#define PSW_MCHK_MASK 0x0004000000000000
+#define PSW_IO_MASK 0x0200000000000000
 
 typedef struct PSW {
     uint64_t mask;
@@ -62,6 +67,17 @@ typedef struct ExtQueue {
     uint32_t param64;
 } ExtQueue;
 
+typedef struct IOIntQueue {
+    uint16_t id;
+    uint16_t nr;
+    uint32_t parm;
+    uint32_t word;
+} IOIntQueue;
+
+typedef struct MchkQueue {
+    uint16_t type;
+} MchkQueue;
+
 typedef struct CPUS390XState {
     uint64_t regs[16];     /* GP registers */
     CPU_DoubleU fregs[16]; /* FP registers */
@@ -93,9 +109,17 @@ typedef struct CPUS390XState {
     uint64_t cregs[16]; /* control registers */
 
     ExtQueue ext_queue[MAX_EXT_QUEUE];
-    int pending_int;
+    IOIntQueue io_queue[MAX_IO_QUEUE][8];
+    MchkQueue mchk_queue[MAX_MCHK_QUEUE];
 
+    int pending_int;
     int ext_index;
+    int io_index[8];
+    int mchk_index;
+
+    uint64_t ckc;
+    uint64_t cputm;
+    uint32_t todpr;
 
     CPU_COMMON
 
@@ -375,10 +399,14 @@ void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 #define EXCP_EXT 1 /* external interrupt */
 #define EXCP_SVC 2 /* supervisor call (syscall) */
 #define EXCP_PGM 3 /* program interruption */
+#define EXCP_IO  7 /* I/O interrupt */
+#define EXCP_MCHK 8 /* machine check */
 
 #define INTERRUPT_EXT        (1 << 0)
 #define INTERRUPT_TOD        (1 << 1)
 #define INTERRUPT_CPUTIMER   (1 << 2)
+#define INTERRUPT_IO         (1 << 3)
+#define INTERRUPT_MCHK       (1 << 4)
 
 /* Program Status Word.  */
 #define S390_PSWM_REGNUM 0
@@ -841,6 +869,45 @@ static inline void cpu_inject_ext(CPUS390XState *env, uint32_t code, uint32_t pa
     cpu_interrupt(env, CPU_INTERRUPT_HARD);
 }
 
+static inline void cpu_inject_io(CPUS390XState *env, uint16_t subchannel_id,
+                                 uint16_t subchannel_number,
+                                 uint32_t io_int_parm, uint32_t io_int_word)
+{
+    int isc = ffs(io_int_word << 2) - 1;
+
+    if (env->io_index[isc] == MAX_IO_QUEUE - 1) {
+        /* ugh - can't queue anymore. Let's drop. */
+        return;
+    }
+
+    env->io_index[isc]++;
+    assert(env->io_index[isc] < MAX_IO_QUEUE);
+
+    env->io_queue[env->io_index[isc]][isc].id = subchannel_id;
+    env->io_queue[env->io_index[isc]][isc].nr = subchannel_number;
+    env->io_queue[env->io_index[isc]][isc].parm = io_int_parm;
+    env->io_queue[env->io_index[isc]][isc].word = io_int_word;
+
+    env->pending_int |= INTERRUPT_IO;
+    cpu_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
+static inline void cpu_inject_crw_mchk(CPUS390XState *env)
+{
+    if (env->mchk_index == MAX_MCHK_QUEUE - 1) {
+        /* ugh - can't queue anymore. Let's drop. */
+        return;
+    }
+
+    env->mchk_index++;
+    assert(env->mchk_index < MAX_MCHK_QUEUE);
+
+    env->mchk_queue[env->mchk_index].type = 1;
+
+    env->pending_int |= INTERRUPT_MCHK;
+    cpu_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
 static inline bool cpu_has_work(CPUState *cpu)
 {
     CPUS390XState *env = &S390_CPU(cpu)->env;
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 3109c77..857c897 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -614,12 +614,140 @@ static void do_ext_interrupt(CPUS390XState *env)
     load_psw(env, mask, addr);
 }
 
+static void do_io_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+    IOIntQueue *q;
+    uint8_t isc;
+    int disable = 1;
+    int found = 0;
+
+    if (!(env->psw.mask & PSW_MASK_IO)) {
+        cpu_abort(env, "I/O int w/o I/O mask\n");
+    }
+
+    for (isc = 0; isc < ARRAY_SIZE(env->io_index); isc++) {
+        if (env->io_index[isc] < 0) {
+            continue;
+        }
+        if (env->io_index[isc] > MAX_IO_QUEUE) {
+            cpu_abort(env, "I/O queue overrun for isc %d: %d\n",
+                      isc, env->io_index[isc]);
+        }
+
+        q = &env->io_queue[env->io_index[isc]][isc];
+        if (!(env->cregs[6] & q->word)) {
+            disable = 0;
+            continue;
+        }
+        found = 1;
+        lowcore = cpu_map_lowcore(env);
+
+        lowcore->subchannel_id = cpu_to_be16(q->id);
+        lowcore->subchannel_nr = cpu_to_be16(q->nr);
+        lowcore->io_int_parm = cpu_to_be32(q->parm);
+        lowcore->io_int_word = cpu_to_be32(q->word);
+        lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+        lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
+        mask = be64_to_cpu(lowcore->io_new_psw.mask);
+        addr = be64_to_cpu(lowcore->io_new_psw.addr);
+
+        cpu_unmap_lowcore(lowcore);
+
+        env->io_index[isc]--;
+        if (env->io_index >= 0) {
+            disable = 0;
+        }
+        break;
+    }
+
+    if (disable) {
+        env->pending_int &= ~INTERRUPT_IO;
+    }
+
+    if (found) {
+        DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
+                env->psw.mask, env->psw.addr);
+        load_psw(env, mask, addr);
+    }
+}
+
+static void do_mchk_interrupt(CPUS390XState *env)
+{
+    uint64_t mask, addr;
+    LowCore *lowcore;
+    MchkQueue *q;
+    int i;
+
+    if (!(env->psw.mask & PSW_MASK_MCHECK)) {
+        cpu_abort(env, "Machine check w/o mchk mask\n");
+    }
+
+    if (env->mchk_index < 0 || env->mchk_index > MAX_MCHK_QUEUE) {
+        cpu_abort(env, "Mchk queue overrun: %d\n", env->mchk_index);
+    }
+
+    q = &env->mchk_queue[env->mchk_index];
+
+    if (q->type != 1) {
+        /* Don't know how to handle this... */
+        cpu_abort(env, "Unknown machine check type %d\n", q->type);
+    }
+    if (!(env->cregs[14] & (1 << 28))) {
+        /* CRW machine checks disabled */
+        return;
+    }
+
+    lowcore = cpu_map_lowcore(env);
+
+    for (i = 0; i < 16; i++) {
+        lowcore->floating_pt_save_area[i] = cpu_to_be64(env->fregs[i].ll);
+        lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
+        lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
+        lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
+    }
+    lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
+    lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
+    lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
+    lowcore->cpu_timer_save_area[0] = cpu_to_be32(env->cputm >> 32);
+    lowcore->cpu_timer_save_area[1] = cpu_to_be32((uint32_t)env->cputm);
+    lowcore->clock_comp_save_area[0] = cpu_to_be32(env->ckc >> 32);
+    lowcore->clock_comp_save_area[1] = cpu_to_be32((uint32_t)env->ckc);
+
+    lowcore->mcck_interruption_code[0] = cpu_to_be32(0x00400f1d);
+    lowcore->mcck_interruption_code[1] = cpu_to_be32(0x40330000);
+    lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+    lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
+    mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
+    addr = be64_to_cpu(lowcore->mcck_new_psw.addr);
+
+    cpu_unmap_lowcore(lowcore);
+
+    env->mchk_index--;
+    if (env->mchk_index == -1) {
+        env->pending_int &= ~INTERRUPT_MCHK;
+    }
+
+    DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
+            env->psw.mask, env->psw.addr);
+
+    load_psw(env, mask, addr);
+}
+
 void do_interrupt(CPUS390XState *env)
 {
     qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
                   __func__, env->exception_index, env->psw.addr);
 
     s390_add_running_cpu(env);
+    /* handle machine checks */
+    if ((env->psw.mask & PSW_MASK_MCHECK) &&
+        (env->exception_index == -1)) {
+        if (env->pending_int & INTERRUPT_MCHK) {
+            env->exception_index = EXCP_MCHK;
+        }
+    }
     /* handle external interrupts */
     if ((env->psw.mask & PSW_MASK_EXT) &&
         env->exception_index == -1) {
@@ -638,6 +766,13 @@ void do_interrupt(CPUS390XState *env)
             env->pending_int &= ~INTERRUPT_TOD;
         }
     }
+    /* handle I/O interrupts */
+    if ((env->psw.mask & PSW_MASK_IO) &&
+        (env->exception_index == -1)) {
+        if (env->pending_int & INTERRUPT_IO) {
+            env->exception_index = EXCP_IO;
+        }
+    }
 
     switch (env->exception_index) {
     case EXCP_PGM:
@@ -649,6 +784,12 @@ void do_interrupt(CPUS390XState *env)
     case EXCP_EXT:
         do_ext_interrupt(env);
         break;
+    case EXCP_IO:
+        do_io_interrupt(env);
+        break;
+    case EXCP_MCHK:
+        do_mchk_interrupt(env);
+        break;
     }
     env->exception_index = -1;
 
commit db1c8f53bfb1b3bff4f904be4f929808a445522e
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:03 2013 +0000

    s390: Channel I/O basic definitions.
    
    Basic channel I/O structures and helper function.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs
index e728abf..3afb0b7 100644
--- a/target-s390x/Makefile.objs
+++ b/target-s390x/Makefile.objs
@@ -1,4 +1,4 @@
 obj-y += translate.o helper.o cpu.o interrupt.o
 obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o
-obj-$(CONFIG_SOFTMMU) += machine.o
+obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 7951aab..c1a0040 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -300,6 +300,7 @@ int cpu_s390x_handle_mmu_fault (CPUS390XState *env, target_ulong address, int rw
                                 int mmu_idx);
 #define cpu_handle_mmu_fault cpu_s390x_handle_mmu_fault
 
+#include "ioinst.h"
 
 #ifndef CONFIG_USER_ONLY
 void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
new file mode 100644
index 0000000..06a16ee
--- /dev/null
+++ b/target-s390x/ioinst.c
@@ -0,0 +1,36 @@
+/*
+ * I/O instructions for S/390
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <sys/types.h>
+
+#include "cpu.h"
+#include "ioinst.h"
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+                                 int *schid)
+{
+    if (!IOINST_SCHID_ONE(value)) {
+        return -EINVAL;
+    }
+    if (!IOINST_SCHID_M(value)) {
+        if (IOINST_SCHID_CSSID(value)) {
+            return -EINVAL;
+        }
+        *cssid = 0;
+        *m = 0;
+    } else {
+        *cssid = IOINST_SCHID_CSSID(value);
+        *m = 1;
+    }
+    *ssid = IOINST_SCHID_SSID(value);
+    *schid = IOINST_SCHID_NR(value);
+    return 0;
+}
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
new file mode 100644
index 0000000..037aabc
--- /dev/null
+++ b/target-s390x/ioinst.h
@@ -0,0 +1,207 @@
+/*
+ * S/390 channel I/O instructions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+*/
+
+#ifndef IOINST_S390X_H
+#define IOINST_S390X_H
+/*
+ * Channel I/O related definitions, as defined in the Principles
+ * Of Operation (and taken from the Linux implementation).
+ */
+
+/* subchannel status word (command mode only) */
+typedef struct SCSW {
+    uint16_t flags;
+    uint16_t ctrl;
+    uint32_t cpa;
+    uint8_t dstat;
+    uint8_t cstat;
+    uint16_t count;
+} QEMU_PACKED SCSW;
+
+#define SCSW_FLAGS_MASK_KEY 0xf000
+#define SCSW_FLAGS_MASK_SCTL 0x0800
+#define SCSW_FLAGS_MASK_ESWF 0x0400
+#define SCSW_FLAGS_MASK_CC 0x0300
+#define SCSW_FLAGS_MASK_FMT 0x0080
+#define SCSW_FLAGS_MASK_PFCH 0x0040
+#define SCSW_FLAGS_MASK_ISIC 0x0020
+#define SCSW_FLAGS_MASK_ALCC 0x0010
+#define SCSW_FLAGS_MASK_SSI 0x0008
+#define SCSW_FLAGS_MASK_ZCC 0x0004
+#define SCSW_FLAGS_MASK_ECTL 0x0002
+#define SCSW_FLAGS_MASK_PNO 0x0001
+
+#define SCSW_CTRL_MASK_FCTL 0x7000
+#define SCSW_CTRL_MASK_ACTL 0x0fe0
+#define SCSW_CTRL_MASK_STCTL 0x001f
+
+#define SCSW_FCTL_CLEAR_FUNC 0x1000
+#define SCSW_FCTL_HALT_FUNC 0x2000
+#define SCSW_FCTL_START_FUNC 0x4000
+
+#define SCSW_ACTL_SUSP 0x0020
+#define SCSW_ACTL_DEVICE_ACTIVE 0x0040
+#define SCSW_ACTL_SUBCH_ACTIVE 0x0080
+#define SCSW_ACTL_CLEAR_PEND 0x0100
+#define SCSW_ACTL_HALT_PEND  0x0200
+#define SCSW_ACTL_START_PEND 0x0400
+#define SCSW_ACTL_RESUME_PEND 0x0800
+
+#define SCSW_STCTL_STATUS_PEND 0x0001
+#define SCSW_STCTL_SECONDARY 0x0002
+#define SCSW_STCTL_PRIMARY 0x0004
+#define SCSW_STCTL_INTERMEDIATE 0x0008
+#define SCSW_STCTL_ALERT 0x0010
+
+#define SCSW_DSTAT_ATTENTION     0x80
+#define SCSW_DSTAT_STAT_MOD      0x40
+#define SCSW_DSTAT_CU_END        0x20
+#define SCSW_DSTAT_BUSY          0x10
+#define SCSW_DSTAT_CHANNEL_END   0x08
+#define SCSW_DSTAT_DEVICE_END    0x04
+#define SCSW_DSTAT_UNIT_CHECK    0x02
+#define SCSW_DSTAT_UNIT_EXCEP    0x01
+
+#define SCSW_CSTAT_PCI           0x80
+#define SCSW_CSTAT_INCORR_LEN    0x40
+#define SCSW_CSTAT_PROG_CHECK    0x20
+#define SCSW_CSTAT_PROT_CHECK    0x10
+#define SCSW_CSTAT_DATA_CHECK    0x08
+#define SCSW_CSTAT_CHN_CTRL_CHK  0x04
+#define SCSW_CSTAT_INTF_CTRL_CHK 0x02
+#define SCSW_CSTAT_CHAIN_CHECK   0x01
+
+/* path management control word */
+typedef struct PMCW {
+    uint32_t intparm;
+    uint16_t flags;
+    uint16_t devno;
+    uint8_t  lpm;
+    uint8_t  pnom;
+    uint8_t  lpum;
+    uint8_t  pim;
+    uint16_t mbi;
+    uint8_t  pom;
+    uint8_t  pam;
+    uint8_t  chpid[8];
+    uint32_t chars;
+} QEMU_PACKED PMCW;
+
+#define PMCW_FLAGS_MASK_QF 0x8000
+#define PMCW_FLAGS_MASK_W 0x4000
+#define PMCW_FLAGS_MASK_ISC 0x3800
+#define PMCW_FLAGS_MASK_ENA 0x0080
+#define PMCW_FLAGS_MASK_LM 0x0060
+#define PMCW_FLAGS_MASK_MME 0x0018
+#define PMCW_FLAGS_MASK_MP 0x0004
+#define PMCW_FLAGS_MASK_TF 0x0002
+#define PMCW_FLAGS_MASK_DNV 0x0001
+#define PMCW_FLAGS_MASK_INVALID 0x0700
+
+#define PMCW_CHARS_MASK_ST 0x00e00000
+#define PMCW_CHARS_MASK_MBFC 0x00000004
+#define PMCW_CHARS_MASK_XMWME 0x00000002
+#define PMCW_CHARS_MASK_CSENSE 0x00000001
+#define PMCW_CHARS_MASK_INVALID 0xff1ffff8
+
+/* subchannel information block */
+typedef struct SCHIB {
+    PMCW pmcw;
+    SCSW scsw;
+    uint64_t mba;
+    uint8_t mda[4];
+} QEMU_PACKED SCHIB;
+
+/* interruption response block */
+typedef struct IRB {
+    SCSW scsw;
+    uint32_t esw[5];
+    uint32_t ecw[8];
+    uint32_t emw[8];
+} QEMU_PACKED IRB;
+
+/* operation request block */
+typedef struct ORB {
+    uint32_t intparm;
+    uint16_t ctrl0;
+    uint8_t lpm;
+    uint8_t ctrl1;
+    uint32_t cpa;
+} QEMU_PACKED ORB;
+
+#define ORB_CTRL0_MASK_KEY 0xf000
+#define ORB_CTRL0_MASK_SPND 0x0800
+#define ORB_CTRL0_MASK_STR 0x0400
+#define ORB_CTRL0_MASK_MOD 0x0200
+#define ORB_CTRL0_MASK_SYNC 0x0100
+#define ORB_CTRL0_MASK_FMT 0x0080
+#define ORB_CTRL0_MASK_PFCH 0x0040
+#define ORB_CTRL0_MASK_ISIC 0x0020
+#define ORB_CTRL0_MASK_ALCC 0x0010
+#define ORB_CTRL0_MASK_SSIC 0x0008
+#define ORB_CTRL0_MASK_C64 0x0002
+#define ORB_CTRL0_MASK_I2K 0x0001
+#define ORB_CTRL0_MASK_INVALID 0x0004
+
+#define ORB_CTRL1_MASK_ILS 0x80
+#define ORB_CTRL1_MASK_MIDAW 0x40
+#define ORB_CTRL1_MASK_ORBX 0x01
+#define ORB_CTRL1_MASK_INVALID 0x3e
+
+/* channel command word (type 1) */
+typedef struct CCW1 {
+    uint8_t cmd_code;
+    uint8_t flags;
+    uint16_t count;
+    uint32_t cda;
+} QEMU_PACKED CCW1;
+
+#define CCW_FLAG_DC              0x80
+#define CCW_FLAG_CC              0x40
+#define CCW_FLAG_SLI             0x20
+#define CCW_FLAG_SKIP            0x10
+#define CCW_FLAG_PCI             0x08
+#define CCW_FLAG_IDA             0x04
+#define CCW_FLAG_SUSPEND         0x02
+
+#define CCW_CMD_NOOP             0x03
+#define CCW_CMD_BASIC_SENSE      0x04
+#define CCW_CMD_TIC              0x08
+#define CCW_CMD_SENSE_ID         0xe4
+
+typedef struct CRW {
+    uint16_t flags;
+    uint16_t rsid;
+} QEMU_PACKED CRW;
+
+#define CRW_FLAGS_MASK_S 0x4000
+#define CRW_FLAGS_MASK_R 0x2000
+#define CRW_FLAGS_MASK_C 0x1000
+#define CRW_FLAGS_MASK_RSC 0x0f00
+#define CRW_FLAGS_MASK_A 0x0080
+#define CRW_FLAGS_MASK_ERC 0x003f
+
+#define CRW_ERC_INIT 0x02
+#define CRW_ERC_IPI  0x04
+
+#define CRW_RSC_SUBCH 0x3
+#define CRW_RSC_CHP   0x4
+
+/* schid disintegration */
+#define IOINST_SCHID_ONE(_schid)   ((_schid & 0x00010000) >> 16)
+#define IOINST_SCHID_M(_schid)     ((_schid & 0x00080000) >> 19)
+#define IOINST_SCHID_CSSID(_schid) ((_schid & 0xff000000) >> 24)
+#define IOINST_SCHID_SSID(_schid)  ((_schid & 0x00060000) >> 17)
+#define IOINST_SCHID_NR(_schid)    (_schid & 0x0000ffff)
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+                                 int *schid);
+#endif
commit 38322ed6518817066ce3a9037fd3795af57e1cdd
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:02 2013 +0000

    s390: Add mapping helper functions.
    
    Add s390_cpu_physical_memory_{map,unmap} with special handling
    for the lowcore.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 1f2d942..7951aab 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -302,6 +302,10 @@ int cpu_s390x_handle_mmu_fault (CPUS390XState *env, target_ulong address, int rw
 
 
 #ifndef CONFIG_USER_ONLY
+void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
+                                   int is_write);
+void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
+                                    int is_write);
 void s390x_tod_timer(void *opaque);
 void s390x_cpu_timer(void *opaque);
 
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 023c074..3109c77 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -490,6 +490,31 @@ static void cpu_unmap_lowcore(LowCore *lowcore)
     cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore));
 }
 
+void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
+                                   int is_write)
+{
+    hwaddr start = addr;
+
+    /* Mind the prefix area. */
+    if (addr < 8192) {
+        /* Map the lowcore. */
+        start += env->psa;
+        *len = MIN(*len, 8192 - addr);
+    } else if ((addr >= env->psa) && (addr < env->psa + 8192)) {
+        /* Map the 0 page. */
+        start -= env->psa;
+        *len = MIN(*len, 8192 - start);
+    }
+
+    return cpu_physical_memory_map(start, len, is_write);
+}
+
+void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
+                                    int is_write)
+{
+    cpu_physical_memory_unmap(addr, len, is_write, len);
+}
+
 static void do_svc_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
commit 4782a23b270ecbb9ce0ca6f3f1b60857a09cef0e
Author: Cornelia Huck <cornelia.huck at de.ibm.com>
Date:   Thu Jan 24 02:28:01 2013 +0000

    s390: Lowcore mapping helper.
    
    Create a lowcore mapping helper that includes a check for sufficient
    length.
    
    Signed-off-by: Cornelia Huck <cornelia.huck at de.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 9a132e6..023c074 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -471,13 +471,31 @@ static uint64_t get_psw_mask(CPUS390XState *env)
     return r;
 }
 
+static LowCore *cpu_map_lowcore(CPUS390XState *env)
+{
+    LowCore *lowcore;
+    hwaddr len = sizeof(LowCore);
+
+    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+
+    if (len < sizeof(LowCore)) {
+        cpu_abort(env, "Could not map lowcore\n");
+    }
+
+    return lowcore;
+}
+
+static void cpu_unmap_lowcore(LowCore *lowcore)
+{
+    cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore));
+}
+
 static void do_svc_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
     LowCore *lowcore;
-    hwaddr len = TARGET_PAGE_SIZE;
 
-    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+    lowcore = cpu_map_lowcore(env);
 
     lowcore->svc_code = cpu_to_be16(env->int_svc_code);
     lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen);
@@ -486,7 +504,7 @@ static void do_svc_interrupt(CPUS390XState *env)
     mask = be64_to_cpu(lowcore->svc_new_psw.mask);
     addr = be64_to_cpu(lowcore->svc_new_psw.addr);
 
-    cpu_physical_memory_unmap(lowcore, len, 1, len);
+    cpu_unmap_lowcore(lowcore);
 
     load_psw(env, mask, addr);
 }
@@ -495,7 +513,6 @@ static void do_program_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
     LowCore *lowcore;
-    hwaddr len = TARGET_PAGE_SIZE;
     int ilen = env->int_pgm_ilen;
 
     switch (ilen) {
@@ -513,7 +530,7 @@ static void do_program_interrupt(CPUS390XState *env)
     qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n",
                   __func__, env->int_pgm_code, ilen);
 
-    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+    lowcore = cpu_map_lowcore(env);
 
     lowcore->pgm_ilen = cpu_to_be16(ilen);
     lowcore->pgm_code = cpu_to_be16(env->int_pgm_code);
@@ -522,7 +539,7 @@ static void do_program_interrupt(CPUS390XState *env)
     mask = be64_to_cpu(lowcore->program_new_psw.mask);
     addr = be64_to_cpu(lowcore->program_new_psw.addr);
 
-    cpu_physical_memory_unmap(lowcore, len, 1, len);
+    cpu_unmap_lowcore(lowcore);
 
     DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__,
             env->int_pgm_code, ilen, env->psw.mask,
@@ -537,7 +554,6 @@ static void do_ext_interrupt(CPUS390XState *env)
 {
     uint64_t mask, addr;
     LowCore *lowcore;
-    hwaddr len = TARGET_PAGE_SIZE;
     ExtQueue *q;
 
     if (!(env->psw.mask & PSW_MASK_EXT)) {
@@ -549,7 +565,7 @@ static void do_ext_interrupt(CPUS390XState *env)
     }
 
     q = &env->ext_queue[env->ext_index];
-    lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+    lowcore = cpu_map_lowcore(env);
 
     lowcore->ext_int_code = cpu_to_be16(q->code);
     lowcore->ext_params = cpu_to_be32(q->param);
@@ -560,7 +576,7 @@ static void do_ext_interrupt(CPUS390XState *env)
     mask = be64_to_cpu(lowcore->external_new_psw.mask);
     addr = be64_to_cpu(lowcore->external_new_psw.addr);
 
-    cpu_physical_memory_unmap(lowcore, len, 1, len);
+    cpu_unmap_lowcore(lowcore);
 
     env->ext_index--;
     if (env->ext_index == -1) {
commit 3ef669e19401b3e504f0bd1ca3113c3aeacd4bed
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Jan 24 12:18:52 2013 +0100

    s390: Add default support for SCLP console
    
    The current s390 machine uses the virtio console as default console,
    but this doesn't mean that we always want to keep it that way for new
    machines.
    
    This patch introduces a way for a machine type to specify that it wants
    the default console to be an SCLP console, which is a lot closer to what
    real hardware does.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Reviewed-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/boards.h b/hw/boards.h
index 3ff9665..3813d4e 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -33,6 +33,7 @@ typedef struct QEMUMachine {
     unsigned int no_serial:1,
         no_parallel:1,
         use_virtcon:1,
+        use_sclp:1,
         no_floppy:1,
         no_cdrom:1,
         no_sdcard:1;
diff --git a/vl.c b/vl.c
index 7aab73b..8b0961e 100644
--- a/vl.c
+++ b/vl.c
@@ -176,6 +176,7 @@ int main(int argc, char **argv)
 #define DEFAULT_RAM_SIZE 128
 
 #define MAX_VIRTIO_CONSOLES 1
+#define MAX_SCLP_CONSOLES 1
 
 static const char *data_dir;
 const char *bios_name = NULL;
@@ -203,6 +204,7 @@ int no_quit = 0;
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *sclp_hds[MAX_SCLP_CONSOLES];
 int win2k_install_hack = 0;
 int singlestep = 0;
 int smp_cpus = 1;
@@ -271,6 +273,7 @@ static int tcg_tb_size;
 static int default_serial = 1;
 static int default_parallel = 1;
 static int default_virtcon = 1;
+static int default_sclp = 1;
 static int default_monitor = 1;
 static int default_floppy = 1;
 static int default_cdrom = 1;
@@ -2340,6 +2343,7 @@ struct device_config {
         DEV_VIRTCON,   /* -virtioconsole */
         DEV_DEBUGCON,  /* -debugcon */
         DEV_GDB,       /* -gdb, -s */
+        DEV_SCLP,      /* s390 sclp */
     } type;
     const char *cmdline;
     Location loc;
@@ -2458,6 +2462,39 @@ static int virtcon_parse(const char *devname)
     return 0;
 }
 
+static int sclp_parse(const char *devname)
+{
+    QemuOptsList *device = qemu_find_opts("device");
+    static int index = 0;
+    char label[32];
+    QemuOpts *dev_opts;
+
+    if (strcmp(devname, "none") == 0) {
+        return 0;
+    }
+    if (index == MAX_SCLP_CONSOLES) {
+        fprintf(stderr, "qemu: too many sclp consoles\n");
+        exit(1);
+    }
+
+    assert(arch_type == QEMU_ARCH_S390X);
+
+    dev_opts = qemu_opts_create(device, NULL, 0, NULL);
+    qemu_opt_set(dev_opts, "driver", "sclpconsole");
+
+    snprintf(label, sizeof(label), "sclpcon%d", index);
+    sclp_hds[index] = qemu_chr_new(label, devname, NULL);
+    if (!sclp_hds[index]) {
+        fprintf(stderr, "qemu: could not connect sclp console"
+                " to character backend '%s'\n", devname);
+        return -1;
+    }
+    qemu_opt_set(dev_opts, "chardev", label);
+
+    index++;
+    return 0;
+}
+
 static int debugcon_parse(const char *devname)
 {   
     QemuOpts *opts;
@@ -3832,6 +3869,9 @@ int main(int argc, char **argv, char **envp)
     if (!machine->use_virtcon) {
         default_virtcon = 0;
     }
+    if (!machine->use_sclp) {
+        default_sclp = 0;
+    }
     if (machine->no_floppy) {
         default_floppy = 0;
     }
@@ -3873,11 +3913,16 @@ int main(int argc, char **argv, char **envp)
             add_device_config(DEV_SERIAL, "mon:stdio");
         } else if (default_virtcon && default_monitor) {
             add_device_config(DEV_VIRTCON, "mon:stdio");
+        } else if (default_sclp && default_monitor) {
+            add_device_config(DEV_SCLP, "mon:stdio");
         } else {
             if (default_serial)
                 add_device_config(DEV_SERIAL, "stdio");
             if (default_virtcon)
                 add_device_config(DEV_VIRTCON, "stdio");
+            if (default_sclp) {
+                add_device_config(DEV_SCLP, "stdio");
+            }
             if (default_monitor)
                 monitor_parse("stdio", "readline");
         }
@@ -3890,6 +3935,9 @@ int main(int argc, char **argv, char **envp)
             monitor_parse("vc:80Cx24C", "readline");
         if (default_virtcon)
             add_device_config(DEV_VIRTCON, "vc:80Cx24C");
+        if (default_sclp) {
+            add_device_config(DEV_SCLP, "vc:80Cx24C");
+        }
     }
 
     socket_init();
@@ -4060,6 +4108,9 @@ int main(int argc, char **argv, char **envp)
         exit(1);
     if (foreach_device_config(DEV_VIRTCON, virtcon_parse) < 0)
         exit(1);
+    if (foreach_device_config(DEV_SCLP, sclp_parse) < 0) {
+        exit(1);
+    }
     if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0)
         exit(1);
 
commit f8bb056564ed719b2fa5e05028bc70aeb0cc5c6c
Author: Igor Mammedov <imammedo at redhat.com>
Date:   Mon Jan 28 12:49:26 2013 +0100

    target-i386: kvm: prevent buffer overflow if -cpu foo, [x]level is too big
    
    Stack corruption may occur if too big 'level' or 'xlevel' values passed
    on command line with KVM enabled, due to limited size of cpuid_data
    in kvm_arch_init_vcpu().
    
    reproduces with:
     qemu -enable-kvm -cpu qemu64,level=4294967295
    or
     qemu -enable-kvm -cpu qemu64,xlevel=4294967295
    
    Check if there is space in cpuid_data before passing it to cpu_x86_cpuid()
    or abort() if there is not space.
    
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Andreas Faerber <afaerber at suse.de>
    Signed-off-by: Igor Mammedov <imammedo at redhat.com>
    Signed-off-by: Gleb Natapov <gleb at redhat.com>

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3acff40..4ecb728 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -411,11 +411,12 @@ static void cpu_update_state(void *opaque, int running, RunState state)
     }
 }
 
+#define KVM_MAX_CPUID_ENTRIES  100
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     struct {
         struct kvm_cpuid2 cpuid;
-        struct kvm_cpuid_entry2 entries[100];
+        struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
     } QEMU_PACKED cpuid_data;
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
@@ -502,6 +503,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
 
     for (i = 0; i <= limit; i++) {
+        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
+            abort();
+        }
         c = &cpuid_data.entries[cpuid_i++];
 
         switch (i) {
@@ -516,6 +521,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
             times = c->eax & 0xff;
 
             for (j = 1; j < times; ++j) {
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
+                    abort();
+                }
                 c = &cpuid_data.entries[cpuid_i++];
                 c->function = i;
                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -544,6 +554,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
                 if (i == 0xd && c->eax == 0) {
                     continue;
                 }
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
                 c = &cpuid_data.entries[cpuid_i++];
             }
             break;
@@ -557,6 +572,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
     cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
 
     for (i = 0x80000000; i <= limit; i++) {
+        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
+            abort();
+        }
         c = &cpuid_data.entries[cpuid_i++];
 
         c->function = i;
@@ -569,6 +588,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
         cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
 
         for (i = 0xC0000000; i <= limit; i++) {
+            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
+                abort();
+            }
             c = &cpuid_data.entries[cpuid_i++];
 
             c->function = i;
commit ec9466ff2e50213c8318ffdd7003f345278ab795
Merge: 503cb22 290adf3
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Jan 28 14:48:03 2013 -0600

    Merge remote-tracking branch 'afaerber/qom-cpu' into staging
    
    * afaerber/qom-cpu: (37 commits)
      kvm: Pass CPUState to kvm_on_sigbus_vcpu()
      cpu: Unconditionalize CPUState fields
      target-m68k: Use type_register() instead of type_register_static()
      target-unicore32: Use type_register() instead of type_register_static()
      target-openrisc: Use type_register() instead of type_register_static()
      target-unicore32: Catch attempt to instantiate abstract type in cpu_init()
      target-openrisc: Catch attempt to instantiate abstract type in cpu_init()
      target-m68k: Catch attempt to instantiate abstract type in cpu_init()
      target-arm: Catch attempt to instantiate abstract type in cpu_init()
      target-alpha: Catch attempt to instantiate abstract type in cpu_init()
      qom: Introduce object_class_is_abstract()
      target-unicore32: Detect attempt to instantiate non-CPU type in cpu_init()
      target-openrisc: Detect attempt to instantiate non-CPU type in cpu_init()
      target-m68k: Detect attempt to instantiate non-CPU type in cpu_init()
      target-alpha: Detect attempt to instantiate non-CPU type in cpu_init()
      target-arm: Detect attempt to instantiate non-CPU type in cpu_init()
      cpu: Add model resolution support to CPUClass
      target-i386: Remove setting tsc-frequency from x86_def_t
      target-i386: Set custom features/properties without intermediate x86_def_t
      target-i386: Remove vendor_override field from CPUX86State
      ...
    
    Conflicts:
    	tests/Makefile
    
    Resolved simple conflict caused by lack of context in Makefile
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --cc tests/Makefile
index a77f26a,804ce42..c681ceb
--- a/tests/Makefile
+++ b/tests/Makefile
@@@ -45,8 -45,9 +45,11 @@@ gcov-files-test-aio-$(CONFIG_WIN32) = a
  gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
  check-unit-y += tests/test-thread-pool$(EXESUF)
  gcov-files-test-thread-pool-y = thread-pool.c
 +gcov-files-test-hbitmap-y = util/hbitmap.c
 +check-unit-y += tests/test-hbitmap$(EXESUF)
+ check-unit-y += tests/test-x86-cpuid$(EXESUF)
+ # all code tested by test-x86-cpuid is inside topology.h
+ gcov-files-test-x86-cpuid-y =
  
  check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
  
@@@ -90,7 -94,7 +96,8 @@@ tests/test-coroutine$(EXESUF): tests/te
  tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
  tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
  tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
 +tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
+ tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
  
  tests/test-qapi-types.c tests/test-qapi-types.h :\
  $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
commit 503cb22e055dcf477f9147fa1a3b8ae17c86c9b0
Merge: 6cebf7a 67bec53
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Jan 28 14:46:45 2013 -0600

    Merge remote-tracking branch 'kwolf/for-anthony' into staging
    
    # By Paolo Bonzini (14) and others
    # Via Kevin Wolf
    * kwolf/for-anthony: (24 commits)
      ide: Add fall through annotations
      block: Create proper size file for disk mirror
      ahci: Add migration support
      ahci: Change data types in preparation for migration
      ahci: Remove unused AHCIDevice fields
      hbitmap: add assertion on hbitmap_iter_init
      mirror: do nothing on zero-sized disk
      block/vdi: Check for bad signature
      block/vdi: Improved return values from vdi_open
      block/vdi: Improve debug output for signature
      block: Use error code EMEDIUMTYPE for wrong format in some block drivers
      block: Add special error code for wrong format
      mirror: support arbitrarily-sized iterations
      mirror: support more than one in-flight AIO operation
      mirror: add buf-size argument to drive-mirror
      mirror: switch mirror_iteration to AIO
      mirror: allow customizing the granularity
      block: allow customizing the granularity of the dirty bitmap
      block: return count of dirty sectors, not chunks
      mirror: perform COW if the cluster size is bigger than the granularity
      ...

commit 6cebf7afac9287f7bcaeb0d8fd64fd7b75e3fa2c
Merge: 6034fe7 49b6d72
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Jan 28 14:41:25 2013 -0600

    Merge remote-tracking branch 'luiz/queue/qmp' into staging
    
    # By Lei Li (3) and others
    # Via Luiz Capitulino
    * luiz/queue/qmp:
      QAPI: Introduce memchar-read QMP command
      QAPI: Introduce memchar-write QMP command
      qemu-char: Add new char backend CirMemCharDriver
      docs: document virtio-balloon stats
      balloon: re-enable balloon stats
      balloon: drop old stats code & API
      block: Monitor command commit neglects to report some errors

commit 10a2158f52796e5b2b7ce7991bde09a3c985a37b
Author: Markus Armbruster <armbru at redhat.com>
Date:   Wed Jan 16 18:15:09 2013 +0100

    qemu-ga: Plug leaks on qmp_guest_network_get_interfaces() error paths
    
    Spotted by Coverity.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 498f5ca..7a0202e 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -935,9 +935,11 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
                 error_setg_errno(errp, errno,
                                  "failed to get MAC address of %s",
                                  ifa->ifa_name);
+                close(sock);
                 goto error;
             }
 
+            close(sock);
             mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
 
             info->value->hardware_address =
@@ -947,20 +949,19 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
                                 (int) mac_addr[4], (int) mac_addr[5]);
 
             info->value->has_hardware_address = true;
-            close(sock);
         }
 
         if (ifa->ifa_addr &&
             ifa->ifa_addr->sa_family == AF_INET) {
             /* interface with IPv4 address */
-            address_item = g_malloc0(sizeof(*address_item));
-            address_item->value = g_malloc0(sizeof(*address_item->value));
             p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
             if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
                 error_setg_errno(errp, errno, "inet_ntop failed");
                 goto error;
             }
 
+            address_item = g_malloc0(sizeof(*address_item));
+            address_item->value = g_malloc0(sizeof(*address_item->value));
             address_item->value->ip_address = g_strdup(addr4);
             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
 
@@ -973,14 +974,14 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
         } else if (ifa->ifa_addr &&
                    ifa->ifa_addr->sa_family == AF_INET6) {
             /* interface with IPv6 address */
-            address_item = g_malloc0(sizeof(*address_item));
-            address_item->value = g_malloc0(sizeof(*address_item->value));
             p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
             if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
                 error_setg_errno(errp, errno, "inet_ntop failed");
                 goto error;
             }
 
+            address_item = g_malloc0(sizeof(*address_item));
+            address_item->value = g_malloc0(sizeof(*address_item->value));
             address_item->value->ip_address = g_strdup(addr6);
             address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
 
commit 6f6867493cc00974de594a509cee5a3be61c64aa
Author: Markus Armbruster <armbru at redhat.com>
Date:   Wed Jan 16 18:15:08 2013 +0100

    qemu-ga: Plug memory leak in guest_fsfreeze_cleanup()
    
    Neglects to free errors allocated by qmp_guest_fsfreeze_thaw().
    Spotted by Coverity.
    
    While there, drop the test whether return value is negative (it's
    never true), and improve logging.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0ad73f3..498f5ca 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -611,13 +611,14 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
 
 static void guest_fsfreeze_cleanup(void)
 {
-    int64_t ret;
     Error *err = NULL;
 
     if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
-        ret = qmp_guest_fsfreeze_thaw(&err);
-        if (ret < 0 || err) {
-            slog("failed to clean up frozen filesystems");
+        qmp_guest_fsfreeze_thaw(&err);
+        if (err) {
+            slog("failed to clean up frozen filesystems: %s",
+                 error_get_pretty(err));
+            error_free(err);
         }
     }
 }
commit 290adf38967787bd985a5ec67dc4717e83c29eaa
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Thu Jan 17 09:30:27 2013 +0100

    kvm: Pass CPUState to kvm_on_sigbus_vcpu()
    
    Since commit 20d695a9254c1b086a456d3b79a3c311236643ba (kvm: Pass
    CPUState to kvm_arch_*) CPUArchState is no longer needed.
    
    Allows to change qemu_kvm_eat_signals() argument as well.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Reviewed-by: Gleb Natapov <gleb at redhat.com>

diff --git a/cpus.c b/cpus.c
index a4390c3..41779eb 100644
--- a/cpus.c
+++ b/cpus.c
@@ -517,7 +517,7 @@ static void qemu_init_sigbus(void)
     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
 }
 
-static void qemu_kvm_eat_signals(CPUArchState *env)
+static void qemu_kvm_eat_signals(CPUState *cpu)
 {
     struct timespec ts = { 0, 0 };
     siginfo_t siginfo;
@@ -538,7 +538,7 @@ static void qemu_kvm_eat_signals(CPUArchState *env)
 
         switch (r) {
         case SIGBUS:
-            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
+            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
                 sigbus_reraise();
             }
             break;
@@ -560,7 +560,7 @@ static void qemu_init_sigbus(void)
 {
 }
 
-static void qemu_kvm_eat_signals(CPUArchState *env)
+static void qemu_kvm_eat_signals(CPUState *cpu)
 {
 }
 #endif /* !CONFIG_LINUX */
@@ -727,7 +727,7 @@ static void qemu_kvm_wait_io_event(CPUArchState *env)
         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
     }
 
-    qemu_kvm_eat_signals(env);
+    qemu_kvm_eat_signals(cpu);
     qemu_wait_io_event_common(cpu);
 }
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 384ee66..6e6dfb3 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -159,7 +159,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap);
 int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset);
 #endif
 
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr);
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
 int kvm_on_sigbus(int code, void *addr);
 
 /* internal API */
diff --git a/kvm-all.c b/kvm-all.c
index 363a358..04ec2d5 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -2026,9 +2026,8 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
     return 0;
 }
 
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
     return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
 }
 
diff --git a/kvm-stub.c b/kvm-stub.c
index 47f8dca..760aadc 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -112,7 +112,7 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint
     return -ENOSYS;
 }
 
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
     return 1;
 }
commit c03c520d508ba8b3a384f9849700987df8e4c328
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Fri Jan 25 16:11:42 2013 +0100

    cpu: Unconditionalize CPUState fields
    
    Commits fc8c5b8c41ee5ba69d7a2be63b02a08c7b0b155b (Makefile.user: Define
    CONFIG_USER_ONLY for libuser/) and
    dd83b06ae61cfa2dc4381ab49f365bd0995fc930 (qom: Introduce CPU class)
    specifically prepared the qom/cpu.c file to be compiled differently for
    softmmu and *-user. This broke as part of build system refactorings
    while CPU patches were in flight, adding conditional fields
    kvm_fd (8737c51c0444f832c4e97d7eb7540eae457e08e4) and
    kvm_vcpu_dirty (20d695a9254c1b086a456d3b79a3c311236643ba) for softmmu.
    
    linux-user and bsd-user would therefore get a CPUState type with
    instance_size ~8 bytes longer than expected.
    Fix this by unconditionally having the fields in CPUState.
    
    In practice, target-specific CPU types' instance_size would compensate
    this, and upstream qom/cpu.c does not yet touch any affected field.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 8097692..46f2247 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -93,10 +93,8 @@ struct CPUState {
     bool stop;
     bool stopped;
 
-#if !defined(CONFIG_USER_ONLY)
     int kvm_fd;
     bool kvm_vcpu_dirty;
-#endif
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
 
commit 2dddbc2123681f0cc37a891fa61d97a88d5e641c
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sun Jan 27 19:31:00 2013 +0100

    target-m68k: Use type_register() instead of type_register_static()
    
    According to its documentation, type_register_static()'s TypeInfo
    argument should exist for the life type of the type.
    Therefore use type_register() when registering the list of CPU subtypes.
    
    No functional change with the current implementation.
    
    Cf. 918fd0839eeafc83bd4984364321a947d29041fe for arm.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index e6df1ee..5c78031 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -162,7 +162,7 @@ static void register_cpu_type(const M68kCPUInfo *info)
         .instance_init = info->instance_init,
     };
 
-    type_register_static(&type_info);
+    type_register(&type_info);
 }
 
 static const TypeInfo m68k_cpu_type_info = {
commit 87fb5811e631e79c24adab1f62bee01987cf1606
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:01:00 2013 +0100

    target-unicore32: Use type_register() instead of type_register_static()
    
    According to its documentation, type_register_static()'s TypeInfo
    argument should exist for the life type of the type.
    Therefore use type_register() when registering the list of CPU subtypes.
    
    No functional change with the current implementation.
    
    Cf. 918fd0839eeafc83bd4984364321a947d29041fe for arm.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index 6735b25..c120440 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -111,7 +111,7 @@ static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
         .instance_init = info->instance_init,
     };
 
-    type_register_static(&type_info);
+    type_register(&type_info);
 }
 
 static const TypeInfo uc32_cpu_type_info = {
commit a1ebd6ce3396954185bda6e94ada60c583f6cbea
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 11:10:14 2013 +0100

    target-openrisc: Use type_register() instead of type_register_static()
    
    According to its documentation, type_register_static()'s TypeInfo
    argument should exist for the life type of the type.
    Therefore use type_register() when registering the list of CPU subtypes.
    
    No functional change with the current implementation.
    
    Cf. 918fd0839eeafc83bd4984364321a947d29041fe for arm.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index adc03ef..54876d9 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -151,7 +151,7 @@ static void cpu_register(const OpenRISCCPUInfo *info)
         .class_size = sizeof(OpenRISCCPUClass),
     };
 
-    type_register_static(&type_info);
+    type_register(&type_info);
 }
 
 static const TypeInfo openrisc_cpu_type_info = {
commit 4933908ac5974252c1830d69e9493fa79c5ea606
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:41:38 2013 +0100

    target-unicore32: Catch attempt to instantiate abstract type in cpu_init()
    
    Fixes -cpu unicore32-cpu asserting.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index 9239d49..6735b25 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -31,7 +31,8 @@ static ObjectClass *uc32_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (oc != NULL && !object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU)) {
+    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU) ||
+                       object_class_is_abstract(oc))) {
         oc = NULL;
     }
     return oc;
commit c432b7840cfbc35fc0d097428d0a2f2a94983360
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:39:38 2013 +0100

    target-openrisc: Catch attempt to instantiate abstract type in cpu_init()
    
    There is no abstract OpenRISCCPU yet, but that seems a bug of its own.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index e23100f..adc03ef 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -98,7 +98,8 @@ static ObjectClass *openrisc_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (oc != NULL && !object_class_dynamic_cast(oc, TYPE_OPENRISC_CPU)) {
+    if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_OPENRISC_CPU) ||
+                       object_class_is_abstract(oc))) {
         return NULL;
     }
     return oc;
commit cae85065a44b731467dc6a5caee7cfc6d26d9ca3
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:36:31 2013 +0100

    target-m68k: Catch attempt to instantiate abstract type in cpu_init()
    
    This fixes -cpu m68k-cpu asserting.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index b231d9a..e6df1ee 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -64,7 +64,8 @@ static ObjectClass *m68k_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (oc != NULL && object_class_dynamic_cast(oc, TYPE_M68K_CPU) == NULL) {
+    if (oc != NULL && (object_class_dynamic_cast(oc, TYPE_M68K_CPU) == NULL ||
+                       object_class_is_abstract(oc))) {
         return NULL;
     }
     return oc;
commit 245fb54db5f8c88c9d73e037178bc3ca1f9a4bb6
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:32:49 2013 +0100

    target-arm: Catch attempt to instantiate abstract type in cpu_init()
    
    This fixes -cpu arm-cpu asserting.
    
    Cc: qemu-stable at nongnu.org
    Acked-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 57126b6..d1a4c82 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -210,7 +210,8 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU)) {
+    if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) ||
+        object_class_is_abstract(oc)) {
         return NULL;
     }
     return oc;
commit a120c287086e0b03a57f1f4ac7d7aa73fe3d1fe7
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:28:22 2013 +0100

    target-alpha: Catch attempt to instantiate abstract type in cpu_init()
    
    This fixes -cpu alpha-cpu asserting.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 0d6975e..0ad69f0 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -96,14 +96,15 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (oc != NULL && object_class_dynamic_cast(oc, TYPE_ALPHA_CPU) != NULL) {
+    if (oc != NULL && object_class_dynamic_cast(oc, TYPE_ALPHA_CPU) != NULL &&
+        !object_class_is_abstract(oc)) {
         return oc;
     }
 
     for (i = 0; i < ARRAY_SIZE(alpha_cpu_aliases); i++) {
         if (strcmp(cpu_model, alpha_cpu_aliases[i].alias) == 0) {
             oc = object_class_by_name(alpha_cpu_aliases[i].typename);
-            assert(oc != NULL);
+            assert(oc != NULL && !object_class_is_abstract(oc));
             return oc;
         }
     }
@@ -111,6 +112,9 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
     typename = g_strdup_printf("%s-" TYPE_ALPHA_CPU, cpu_model);
     oc = object_class_by_name(typename);
     g_free(typename);
+    if (oc != NULL && object_class_is_abstract(oc)) {
+        oc = NULL;
+    }
     return oc;
 }
 
commit 178623789465287624c48f7ef12d0ab83a1dc380
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:20:18 2013 +0100

    qom: Introduce object_class_is_abstract()
    
    This lets a caller check if an ObjectClass as returned by, e.g.,
    object_class_by_name() is instantiatable.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Cc: Anthony Liguori <anthony at codemonkey.ws>

diff --git a/include/qom/object.h b/include/qom/object.h
index 8e16ea8..48e80ba 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -691,6 +691,14 @@ ObjectClass *object_class_get_parent(ObjectClass *klass);
 const char *object_class_get_name(ObjectClass *klass);
 
 /**
+ * object_class_is_abstract:
+ * @klass: The class to obtain the abstractness for.
+ *
+ * Returns: %true if @klass is abstract, %false otherwise.
+ */
+bool object_class_is_abstract(ObjectClass *klass);
+
+/**
  * object_class_by_name:
  * @typename: The QOM typename to obtain the class for.
  *
diff --git a/qom/object.c b/qom/object.c
index 03e6f24..e200282 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -501,6 +501,11 @@ ObjectClass *object_get_class(Object *obj)
     return obj->class;
 }
 
+bool object_class_is_abstract(ObjectClass *klass)
+{
+    return klass->type->abstract;
+}
+
 const char *object_class_get_name(ObjectClass *klass)
 {
     return klass->type->name;
commit d89e12188d50f7f8a894027789f32fa7ba6226ad
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 12:07:17 2013 +0100

    target-unicore32: Detect attempt to instantiate non-CPU type in cpu_init()
    
    Consolidate model checking into a new uc32_cpu_class_by_name().
    
    If the name matches an existing type, also check whether that type is
    actually (a sub-type of) TYPE_UNICORE32_CPU.
    
    This fixes, e.g., -cpu puv3_dma asserting.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index 884c101..9239d49 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -22,6 +22,21 @@ static inline void set_feature(CPUUniCore32State *env, int feature)
 
 /* CPU models */
 
+static ObjectClass *uc32_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL && !object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU)) {
+        oc = NULL;
+    }
+    return oc;
+}
+
 typedef struct UniCore32CPUInfo {
     const char *name;
     void (*instance_init)(Object *obj);
@@ -80,6 +95,13 @@ static void uc32_cpu_initfn(Object *obj)
     tlb_flush(env, 1);
 }
 
+static void uc32_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+
+    cc->class_by_name = uc32_cpu_class_by_name;
+}
+
 static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
 {
     TypeInfo type_info = {
@@ -98,6 +120,7 @@ static const TypeInfo uc32_cpu_type_info = {
     .instance_init = uc32_cpu_initfn,
     .abstract = true,
     .class_size = sizeof(UniCore32CPUClass),
+    .class_init = uc32_cpu_class_init,
 };
 
 static void uc32_cpu_register_types(void)
diff --git a/target-unicore32/helper.c b/target-unicore32/helper.c
index 5359538..183b5b3 100644
--- a/target-unicore32/helper.c
+++ b/target-unicore32/helper.c
@@ -29,12 +29,14 @@ CPUUniCore32State *uc32_cpu_init(const char *cpu_model)
 {
     UniCore32CPU *cpu;
     CPUUniCore32State *env;
+    ObjectClass *oc;
     static int inited = 1;
 
-    if (object_class_by_name(cpu_model) == NULL) {
+    oc = cpu_class_by_name(TYPE_UNICORE32_CPU, cpu_model);
+    if (oc == NULL) {
         return NULL;
     }
-    cpu = UNICORE32_CPU(object_new(cpu_model));
+    cpu = UNICORE32_CPU(object_new(object_class_get_name(oc)));
     env = &cpu->env;
 
     if (inited) {
commit 8d5ce2e5643f4055b67ea012d91e812f1e607a83
Author: Andreas FÃ¤rber <andreas.faerber at web.de>
Date:   Wed Jan 16 15:45:34 2013 +0100

    prep_pci: Convert to QOM realizefn
    
    SysBusDeviceClass' initfn merely calls SysBusDeviceClass::init, so we
    can already hook up our own realizefn overwriting this behavior.
    
    A symmetric unrealizefn is not necessary, knowing that the child's
    unrealizefn is still no-op, too. Avoids ripping it out again when
    recursive realization at DeviceState-level is implemented.
    
    Signed-off-by: Andreas FÃ¤rber <andreas.faerber at web.de>

diff --git a/hw/prep_pci.c b/hw/prep_pci.c
index e1420ca..52ee5d9 100644
--- a/hw/prep_pci.c
+++ b/hw/prep_pci.c
@@ -111,8 +111,9 @@ static void prep_set_irq(void *opaque, int irq_num, int level)
     qemu_set_irq(pic[irq_num] , level);
 }
 
-static int raven_pcihost_init(SysBusDevice *dev)
+static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
 {
+    SysBusDevice *dev = SYS_BUS_DEVICE(d);
     PCIHostState *h = PCI_HOST_BRIDGE(dev);
     PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(dev);
     MemoryRegion *address_space_mem = get_system_memory();
@@ -141,7 +142,7 @@ static int raven_pcihost_init(SysBusDevice *dev)
     memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->intack);
 
     /* TODO Remove once realize propagates to child devices. */
-    return qdev_init(DEVICE(&s->pci_dev));
+    object_property_set_bool(OBJECT(&s->pci_dev), true, "realized", errp);
 }
 
 static void raven_pcihost_initfn(Object *obj)
@@ -207,10 +208,9 @@ static const TypeInfo raven_info = {
 
 static void raven_pcihost_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    k->init = raven_pcihost_init;
+    dc->realize = raven_pcihost_realizefn;
     dc->fw_name = "pci";
     dc->no_user = 1;
 }
commit 98aca3c8e8778745cdd0670a792a41314115afa5
Author: Andreas FÃ¤rber <andreas.faerber at web.de>
Date:   Sat May 26 19:14:52 2012 +0200

    prep_pci: Create PCIBus and PCIDevice in-place
    
    Prepares for QOM realizefn by removing object creation from qdev initfn.
    
    Signed-off-by: Andreas FÃ¤rber <andreas.faerber at web.de>

diff --git a/hw/prep_pci.c b/hw/prep_pci.c
index 212a2ac..e1420ca 100644
--- a/hw/prep_pci.c
+++ b/hw/prep_pci.c
@@ -2,6 +2,7 @@
  * QEMU PREP PCI host
  *
  * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2011-2013 Andreas FÃ¤rber
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -24,12 +25,21 @@
 
 #include "hw.h"
 #include "pci/pci.h"
+#include "pci/pci_bus.h"
 #include "pci/pci_host.h"
 #include "pc.h"
 #include "exec/address-spaces.h"
 
+#define TYPE_RAVEN_PCI_DEVICE "raven"
 #define TYPE_RAVEN_PCI_HOST_BRIDGE "raven-pcihost"
 
+#define RAVEN_PCI_DEVICE(obj) \
+    OBJECT_CHECK(RavenPCIState, (obj), TYPE_RAVEN_PCI_DEVICE)
+
+typedef struct RavenPCIState {
+    PCIDevice dev;
+} RavenPCIState;
+
 #define RAVEN_PCI_HOST_BRIDGE(obj) \
     OBJECT_CHECK(PREPPCIState, (obj), TYPE_RAVEN_PCI_HOST_BRIDGE)
 
@@ -38,12 +48,10 @@ typedef struct PRePPCIState {
 
     MemoryRegion intack;
     qemu_irq irq[4];
+    PCIBus pci_bus;
+    RavenPCIState pci_dev;
 } PREPPCIState;
 
-typedef struct RavenPCIState {
-    PCIDevice dev;
-} RavenPCIState;
-
 static inline uint32_t PPC_PCIIO_config(hwaddr addr)
 {
     int i;
@@ -108,18 +116,13 @@ static int raven_pcihost_init(SysBusDevice *dev)
     PCIHostState *h = PCI_HOST_BRIDGE(dev);
     PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(dev);
     MemoryRegion *address_space_mem = get_system_memory();
-    MemoryRegion *address_space_io = get_system_io();
-    PCIBus *bus;
     int i;
 
     for (i = 0; i < 4; i++) {
         sysbus_init_irq(dev, &s->irq[i]);
     }
 
-    bus = pci_register_bus(DEVICE(dev), NULL,
-                           prep_set_irq, prep_map_irq, s->irq,
-                           address_space_mem, address_space_io, 0, 4);
-    h->bus = bus;
+    pci_bus_irqs(&s->pci_bus, prep_set_irq, prep_map_irq, s->irq, 4);
 
     memory_region_init_io(&h->conf_mem, &pci_host_conf_be_ops, s,
                           "pci-conf-idx", 1);
@@ -136,9 +139,29 @@ static int raven_pcihost_init(SysBusDevice *dev)
 
     memory_region_init_io(&s->intack, &PPC_intack_ops, s, "pci-intack", 1);
     memory_region_add_subregion(address_space_mem, 0xbffffff0, &s->intack);
-    pci_create_simple(bus, 0, "raven");
 
-    return 0;
+    /* TODO Remove once realize propagates to child devices. */
+    return qdev_init(DEVICE(&s->pci_dev));
+}
+
+static void raven_pcihost_initfn(Object *obj)
+{
+    PCIHostState *h = PCI_HOST_BRIDGE(obj);
+    PREPPCIState *s = RAVEN_PCI_HOST_BRIDGE(obj);
+    MemoryRegion *address_space_mem = get_system_memory();
+    MemoryRegion *address_space_io = get_system_io();
+    DeviceState *pci_dev;
+
+    pci_bus_new_inplace(&s->pci_bus, DEVICE(obj), NULL,
+                        address_space_mem, address_space_io, 0);
+    h->bus = &s->pci_bus;
+
+    object_initialize(&s->pci_dev, TYPE_RAVEN_PCI_DEVICE);
+    pci_dev = DEVICE(&s->pci_dev);
+    qdev_set_parent_bus(pci_dev, BUS(&s->pci_bus));
+    object_property_set_int(OBJECT(&s->pci_dev), PCI_DEVFN(0, 0), "addr",
+                            NULL);
+    qdev_prop_set_bit(pci_dev, "multifunction", false);
 }
 
 static int raven_init(PCIDevice *d)
@@ -176,7 +199,7 @@ static void raven_class_init(ObjectClass *klass, void *data)
 }
 
 static const TypeInfo raven_info = {
-    .name = "raven",
+    .name = TYPE_RAVEN_PCI_DEVICE,
     .parent = TYPE_PCI_DEVICE,
     .instance_size = sizeof(RavenPCIState),
     .class_init = raven_class_init,
@@ -196,6 +219,7 @@ static const TypeInfo raven_pcihost_info = {
     .name = TYPE_RAVEN_PCI_HOST_BRIDGE,
     .parent = TYPE_PCI_HOST_BRIDGE,
     .instance_size = sizeof(PREPPCIState),
+    .instance_init = raven_pcihost_initfn,
     .class_init = raven_pcihost_class_init,
 };
 
commit bd039ce0094f3724a87a193c846ee8468ce652b0
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Wed Jan 23 11:17:14 2013 +0100

    target-openrisc: Detect attempt to instantiate non-CPU type in cpu_init()
    
    Consolidate model checking into a new openrisc_cpu_class_by_name().
    
    If the name matches an existing type, also check whether that type is
    actually (a sub-type of) TYPE_OPENRISC_CPU.
    
    This fixes, e.g., -cpu open_eth asserting.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 7a55112..e23100f 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -88,6 +88,22 @@ static void openrisc_cpu_initfn(Object *obj)
 }
 
 /* CPU models */
+
+static ObjectClass *openrisc_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL && !object_class_dynamic_cast(oc, TYPE_OPENRISC_CPU)) {
+        return NULL;
+    }
+    return oc;
+}
+
 static void or1200_initfn(Object *obj)
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(obj);
@@ -120,6 +136,8 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
 
     occ->parent_reset = cc->reset;
     cc->reset = openrisc_cpu_reset;
+
+    cc->class_by_name = openrisc_cpu_class_by_name;
 }
 
 static void cpu_register(const OpenRISCCPUInfo *info)
@@ -158,11 +176,13 @@ static void openrisc_cpu_register_types(void)
 OpenRISCCPU *cpu_openrisc_init(const char *cpu_model)
 {
     OpenRISCCPU *cpu;
+    ObjectClass *oc;
 
-    if (!object_class_by_name(cpu_model)) {
+    oc = openrisc_cpu_class_by_name(cpu_model);
+    if (oc == NULL) {
         return NULL;
     }
-    cpu = OPENRISC_CPU(object_new(cpu_model));
+    cpu = OPENRISC_CPU(object_new(object_class_get_name(oc)));
     cpu->env.cpu_model_str = cpu_model;
 
     openrisc_cpu_realize(OBJECT(cpu), NULL);
commit bc5b2da32ba54d991e8669b14a771afb3a67f408
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Jan 21 17:50:15 2013 +0100

    target-m68k: Detect attempt to instantiate non-CPU type in cpu_init()
    
    Consolidate model checking into a new m68k_cpu_class_by_name().
    
    If the name matches an existing type, also check whether that type is
    (a sub-type of) TYPE_M68K_CPU.
    
    This fixes, e.g., -cpu ide-hd asserting.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index ce89674..b231d9a 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -55,6 +55,21 @@ static void m68k_cpu_reset(CPUState *s)
 
 /* CPU models */
 
+static ObjectClass *m68k_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (cpu_model == NULL) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (oc != NULL && object_class_dynamic_cast(oc, TYPE_M68K_CPU) == NULL) {
+        return NULL;
+    }
+    return oc;
+}
+
 static void m5206_cpu_initfn(Object *obj)
 {
     M68kCPU *cpu = M68K_CPU(obj);
@@ -134,6 +149,8 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
 
     mcc->parent_reset = cc->reset;
     cc->reset = m68k_cpu_reset;
+
+    cc->class_by_name = m68k_cpu_class_by_name;
 }
 
 static void register_cpu_type(const M68kCPUInfo *info)
diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index 097fc78..f66e12b 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -97,12 +97,14 @@ CPUM68KState *cpu_m68k_init(const char *cpu_model)
 {
     M68kCPU *cpu;
     CPUM68KState *env;
+    ObjectClass *oc;
     static int inited;
 
-    if (object_class_by_name(cpu_model) == NULL) {
+    oc = cpu_class_by_name(TYPE_M68K_CPU, cpu_model);
+    if (oc == NULL) {
         return NULL;
     }
-    cpu = M68K_CPU(object_new(cpu_model));
+    cpu = M68K_CPU(object_new(object_class_get_name(oc)));
     env = &cpu->env;
 
     if (!inited) {
commit 0e44a02301b081d36e686e767694a770c25160a2
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Jan 21 17:27:54 2013 +0100

    target-alpha: Detect attempt to instantiate non-CPU type in cpu_init()
    
    Check in alpha_cpu_class_by_name() whether the type found is actually
    (a sub-type of) TYPE_ALPHA_CPU.
    
    This fixes, e.g., -cpu typhoon-pcihost asserting.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 3ac0fde..0d6975e 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -96,7 +96,7 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
     }
 
     oc = object_class_by_name(cpu_model);
-    if (oc != NULL) {
+    if (oc != NULL && object_class_dynamic_cast(oc, TYPE_ALPHA_CPU) != NULL) {
         return oc;
     }
 
commit 5900d6b2d59875c9b11e4d8cead6d9ddaa9eb787
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Jan 21 16:11:43 2013 +0100

    target-arm: Detect attempt to instantiate non-CPU type in cpu_init()
    
    Consolidate model checking into a new arm_cpu_class_by_name().
    
    If the name matches an existing type, also check whether that type is
    actually (a sub-type of) TYPE_ARM_CPU.
    
    This fixes, e.g., -cpu tmp105 asserting.
    
    Cc: qemu-stable <qemu-stable at nongnu.org>
    Acked-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 07588a1..57126b6 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -201,6 +201,21 @@ void arm_cpu_realize(ARMCPU *cpu)
 
 /* CPU models */
 
+static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
+{
+    ObjectClass *oc;
+
+    if (!cpu_model) {
+        return NULL;
+    }
+
+    oc = object_class_by_name(cpu_model);
+    if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU)) {
+        return NULL;
+    }
+    return oc;
+}
+
 static void arm926_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -766,6 +781,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
 
     acc->parent_reset = cc->reset;
     cc->reset = arm_cpu_reset;
+
+    cc->class_by_name = arm_cpu_class_by_name;
 }
 
 static void cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 37c34a1..7a10fdd 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1262,12 +1262,14 @@ ARMCPU *cpu_arm_init(const char *cpu_model)
 {
     ARMCPU *cpu;
     CPUARMState *env;
+    ObjectClass *oc;
     static int inited = 0;
 
-    if (!object_class_by_name(cpu_model)) {
+    oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
+    if (!oc) {
         return NULL;
     }
-    cpu = ARM_CPU(object_new(cpu_model));
+    cpu = ARM_CPU(object_new(object_class_get_name(oc)));
     env = &cpu->env;
     env->cpu_model_str = cpu_model;
     arm_cpu_realize(cpu);
commit 2b8c27549917b3e07fec5807dbd2b6528ceb4efa
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Jan 21 18:26:21 2013 +0100

    cpu: Add model resolution support to CPUClass
    
    Introduce CPUClass::class_by_name and add a default implementation.
    Hook up the alpha and ppc implementations.
    
    Introduce a wrapper function cpu_class_by_name().
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 773caf9..8097692 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -40,6 +40,8 @@ typedef struct CPUState CPUState;
 
 /**
  * CPUClass:
+ * @class_by_name: Callback to map -cpu command line model name to an
+ * instantiatable CPU type.
  * @reset: Callback to reset the #CPUState to its initial state.
  *
  * Represents a CPU family or model.
@@ -49,6 +51,8 @@ typedef struct CPUClass {
     DeviceClass parent_class;
     /*< public >*/
 
+    ObjectClass *(*class_by_name)(const char *cpu_model);
+
     void (*reset)(CPUState *cpu);
 } CPUClass;
 
@@ -108,6 +112,17 @@ struct CPUState {
 void cpu_reset(CPUState *cpu);
 
 /**
+ * cpu_class_by_name:
+ * @typename: The CPU base type.
+ * @cpu_model: The model string without any parameters.
+ *
+ * Looks up a CPU #ObjectClass matching name @cpu_model.
+ *
+ * Returns: A #CPUClass or %NULL if not matching class is found.
+ */
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model);
+
+/**
  * qemu_cpu_has_work:
  * @cpu: The vCPU to check.
  *
diff --git a/qom/cpu.c b/qom/cpu.c
index 49e5134..8fb538b 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -34,11 +34,24 @@ static void cpu_common_reset(CPUState *cpu)
 {
 }
 
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
+{
+    CPUClass *cc = CPU_CLASS(object_class_by_name(typename));
+
+    return cc->class_by_name(cpu_model);
+}
+
+static ObjectClass *cpu_common_class_by_name(const char *cpu_model)
+{
+    return NULL;
+}
+
 static void cpu_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     CPUClass *k = CPU_CLASS(klass);
 
+    k->class_by_name = cpu_common_class_by_name;
     k->reset = cpu_common_reset;
     dc->no_user = 1;
 }
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 40e9809..3ac0fde 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -244,6 +244,13 @@ static void alpha_cpu_initfn(Object *obj)
     env->fen = 1;
 }
 
+static void alpha_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+
+    cc->class_by_name = alpha_cpu_class_by_name;
+}
+
 static const TypeInfo alpha_cpu_type_info = {
     .name = TYPE_ALPHA_CPU,
     .parent = TYPE_CPU,
@@ -251,6 +258,7 @@ static const TypeInfo alpha_cpu_type_info = {
     .instance_init = alpha_cpu_initfn,
     .abstract = true,
     .class_size = sizeof(AlphaCPUClass),
+    .class_init = alpha_cpu_class_init,
 };
 
 static void alpha_cpu_register_types(void)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 4f767c9..e143af5 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10578,6 +10578,8 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 
     pcc->parent_reset = cc->reset;
     cc->reset = ppc_cpu_reset;
+
+    cc->class_by_name = ppc_cpu_class_by_name;
 }
 
 static const TypeInfo ppc_cpu_type_info = {
commit 2c728dfef56d468a6a80b4dacdfb7109220d2546
Author: Igor Mammedov <imammedo at redhat.com>
Date:   Mon Jan 21 15:06:39 2013 +0100

    target-i386: Remove setting tsc-frequency from x86_def_t
    
    Setting tsc-frequency from x86_def_t is NOP because default tsc_khz
    in x86_def_t is 0 and CPUX86State.tsc_khz is also initialized to 0
    by default. So there is no need to overwrite tsc_khz with default 0
    because field was already initialized to 0.
    
    Custom tsc-frequency setting is not affected due to it being set
    without using x86_def_t.
    
    Field tsc_khz in x86_def_t becomes unused with this patch, so drop it
    as well.
    
    Signed-off-by: Igor Mammedov <imammedo at redhat.com>
    Reviewed-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 117b8b0..5c108e1 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -354,7 +354,6 @@ typedef struct x86_def_t {
     int family;
     int model;
     int stepping;
-    int tsc_khz;
     uint32_t features, ext_features, ext2_features, ext3_features;
     uint32_t kvm_features, svm_features;
     uint32_t xlevel;
@@ -1560,8 +1559,6 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
     env->cpuid_ext4_features = def->ext4_features;
     env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features;
     env->cpuid_xlevel2 = def->xlevel2;
-    object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
-                            "tsc-frequency", &error);
 
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", &error);
     if (error) {
commit a91987c25db38834091174681a6e1ffcbe582182
Author: Igor Mammedov <imammedo at redhat.com>
Date:   Mon Jan 21 15:06:38 2013 +0100

    target-i386: Set custom features/properties without intermediate x86_def_t
    
    Move custom features parsing after built-in cpu_model defaults are set
    and set custom features directly on CPU instance. That allows to make a
    clear distinction between built-in cpu model defaults that eventually
    should go into class_init() and extra property setting which is done
    after defaults are set on CPU instance.
    
    Impl. details:
     * use object_property_parse() property setter so it would be a mechanical
       change to switch to global properties later.
     * And after all current features/properties are converted into static
       properties, it will take a trivial patch to switch to global properties.
       Which will allow to:
       * get CPU instance initialized with all parameters passed on -cpu ...
         cmd. line from object_new() call.
       * call cpu_model/featurestr parsing only once before CPUs are created
       * open a road for removing CPUxxxState.cpu_model_str field, when other
         CPUs are similarly converted to subclasses and static properties.
     - re-factor error handling, to use Error instead of fprintf()s, since
       it is anyway passed in for property setter.
    
    Signed-off-by: Igor Mammedov <imammedo at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index b11fe30..117b8b0 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1300,7 +1300,7 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
 
 /* Parse "+feature,-feature,feature=foo" CPU feature string
  */
-static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
+static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
 {
     char *featurestr; /* Single 'key=value" string being parsed */
     /* Features to be added */
@@ -1308,6 +1308,7 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
     /* Features to be removed */
     FeatureWordArray minus_features = { 0 };
     uint32_t numvalue;
+    CPUX86State *env = &cpu->env;
 
     featurestr = features ? strtok(features, ",") : NULL;
 
@@ -1320,77 +1321,57 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
         } else if ((val = strchr(featurestr, '='))) {
             *val = 0; val++;
             if (!strcmp(featurestr, "family")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err || numvalue > 0xff + 0xf) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->family = numvalue;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "model")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err || numvalue > 0xff) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->model = numvalue;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "stepping")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err || numvalue > 0xf) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->stepping = numvalue ;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "level")) {
-                char *err;
-                numvalue = strtoul(val, &err, 0);
-                if (!*val || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
-                }
-                x86_cpu_def->level = numvalue;
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "xlevel")) {
                 char *err;
+                char num[32];
+
                 numvalue = strtoul(val, &err, 0);
                 if (!*val || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
+                    error_setg(errp, "bad numerical value %s\n", val);
+                    goto out;
                 }
                 if (numvalue < 0x80000000) {
                     fprintf(stderr, "xlevel value shall always be >= 0x80000000"
                             ", fixup will be removed in future versions\n");
                     numvalue += 0x80000000;
                 }
-                x86_cpu_def->xlevel = numvalue;
+                snprintf(num, sizeof(num), "%" PRIu32, numvalue);
+                object_property_parse(OBJECT(cpu), num, featurestr, errp);
             } else if (!strcmp(featurestr, "vendor")) {
-                pstrcpy(x86_cpu_def->vendor, sizeof(x86_cpu_def->vendor), val);
+                object_property_parse(OBJECT(cpu), val, featurestr, errp);
             } else if (!strcmp(featurestr, "model_id")) {
-                pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
-                        val);
+                object_property_parse(OBJECT(cpu), val, "model-id", errp);
             } else if (!strcmp(featurestr, "tsc_freq")) {
                 int64_t tsc_freq;
                 char *err;
+                char num[32];
 
                 tsc_freq = strtosz_suffix_unit(val, &err,
                                                STRTOSZ_DEFSUFFIX_B, 1000);
                 if (tsc_freq < 0 || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
+                    error_setg(errp, "bad numerical value %s\n", val);
+                    goto out;
                 }
-                x86_cpu_def->tsc_khz = tsc_freq / 1000;
+                snprintf(num, sizeof(num), "%" PRId64, tsc_freq);
+                object_property_parse(OBJECT(cpu), num, "tsc-frequency", errp);
             } else if (!strcmp(featurestr, "hv_spinlocks")) {
                 char *err;
                 numvalue = strtoul(val, &err, 0);
                 if (!*val || *err) {
-                    fprintf(stderr, "bad numerical value %s\n", val);
-                    goto error;
+                    error_setg(errp, "bad numerical value %s\n", val);
+                    goto out;
                 }
                 hyperv_set_spinlock_retries(numvalue);
             } else {
-                fprintf(stderr, "unrecognized feature %s\n", featurestr);
-                goto error;
+                error_setg(errp, "unrecognized feature %s\n", featurestr);
+                goto out;
             }
         } else if (!strcmp(featurestr, "check")) {
             check_cpuid = 1;
@@ -1401,31 +1382,34 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
         } else if (!strcmp(featurestr, "hv_vapic")) {
             hyperv_enable_vapic_recommended(true);
         } else {
-            fprintf(stderr, "feature string `%s' not in format (+feature|-feature|feature=xyz)\n", featurestr);
-            goto error;
+            error_setg(errp, "feature string `%s' not in format (+feature|"
+                       "-feature|feature=xyz)\n", featurestr);
+            goto out;
+        }
+        if (error_is_set(errp)) {
+            goto out;
         }
         featurestr = strtok(NULL, ",");
     }
-    x86_cpu_def->features |= plus_features[FEAT_1_EDX];
-    x86_cpu_def->ext_features |= plus_features[FEAT_1_ECX];
-    x86_cpu_def->ext2_features |= plus_features[FEAT_8000_0001_EDX];
-    x86_cpu_def->ext3_features |= plus_features[FEAT_8000_0001_ECX];
-    x86_cpu_def->ext4_features |= plus_features[FEAT_C000_0001_EDX];
-    x86_cpu_def->kvm_features |= plus_features[FEAT_KVM];
-    x86_cpu_def->svm_features |= plus_features[FEAT_SVM];
-    x86_cpu_def->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX];
-    x86_cpu_def->features &= ~minus_features[FEAT_1_EDX];
-    x86_cpu_def->ext_features &= ~minus_features[FEAT_1_ECX];
-    x86_cpu_def->ext2_features &= ~minus_features[FEAT_8000_0001_EDX];
-    x86_cpu_def->ext3_features &= ~minus_features[FEAT_8000_0001_ECX];
-    x86_cpu_def->ext4_features &= ~minus_features[FEAT_C000_0001_EDX];
-    x86_cpu_def->kvm_features &= ~minus_features[FEAT_KVM];
-    x86_cpu_def->svm_features &= ~minus_features[FEAT_SVM];
-    x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX];
-    return 0;
+    env->cpuid_features |= plus_features[FEAT_1_EDX];
+    env->cpuid_ext_features |= plus_features[FEAT_1_ECX];
+    env->cpuid_ext2_features |= plus_features[FEAT_8000_0001_EDX];
+    env->cpuid_ext3_features |= plus_features[FEAT_8000_0001_ECX];
+    env->cpuid_ext4_features |= plus_features[FEAT_C000_0001_EDX];
+    env->cpuid_kvm_features |= plus_features[FEAT_KVM];
+    env->cpuid_svm_features |= plus_features[FEAT_SVM];
+    env->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX];
+    env->cpuid_features &= ~minus_features[FEAT_1_EDX];
+    env->cpuid_ext_features &= ~minus_features[FEAT_1_ECX];
+    env->cpuid_ext2_features &= ~minus_features[FEAT_8000_0001_EDX];
+    env->cpuid_ext3_features &= ~minus_features[FEAT_8000_0001_ECX];
+    env->cpuid_ext4_features &= ~minus_features[FEAT_C000_0001_EDX];
+    env->cpuid_kvm_features &= ~minus_features[FEAT_KVM];
+    env->cpuid_svm_features &= ~minus_features[FEAT_SVM];
+    env->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX];
 
-error:
-    return -1;
+out:
+    return;
 }
 
 /* generate a composite string into buf of all cpuid names in featureset
@@ -1561,10 +1545,6 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
     }
     def->ext_features |= CPUID_EXT_HYPERVISOR;
 
-    if (cpu_x86_parse_featurestr(def, features) < 0) {
-        error_setg(&error, "Invalid cpu_model string format: %s", cpu_model);
-        goto out;
-    }
     object_property_set_str(OBJECT(cpu), def->vendor, "vendor", &error);
     object_property_set_int(OBJECT(cpu), def->level, "level", &error);
     object_property_set_int(OBJECT(cpu), def->family, "family", &error);
@@ -1584,7 +1564,11 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
                             "tsc-frequency", &error);
 
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", &error);
+    if (error) {
+        goto out;
+    }
 
+    cpu_x86_parse_featurestr(cpu, features, &error);
 out:
     g_strfreev(model_pieces);
     if (error) {
commit 11acfdd5a1647895ff9094e7f93f3317224eb4d8
Author: Igor Mammedov <imammedo at redhat.com>
Date:   Mon Jan 21 15:06:37 2013 +0100

    target-i386: Remove vendor_override field from CPUX86State
    
    Commit 8935499831312 makes cpuid return to guest host's vendor value
    instead of built-in one by default if kvm_enabled() == true and allows
    to override this behavior if 'vendor' is specified on -cpu command line.
    
    But every time guest calls cpuid to get 'vendor' value, host's value is
    read again and again in default case.
    
    It complicates semantics of vendor property and makes it harder to use.
    
    Instead of reading 'vendor' value from host every time cpuid[vendor] is
    called, override 'vendor' value only once in cpu_x86_find_by_name(), when
    built-in CPU model is found and if(kvm_enabled() == true).
    
    It provides the same default semantics
     if (kvm_enabled() == true)  vendor = host's vendor
     else vendor = built-in vendor
    
    and then later:
     if (custom vendor) vendor = custom vendor
    
    'vendor' value is overridden when user provides it on -cpu command line,
    and there is no need for vendor_override field anymore, remove it.
    
    Signed-off-by: Igor Mammedov <imammedo at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 043a21d..b11fe30 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -359,7 +359,6 @@ typedef struct x86_def_t {
     uint32_t kvm_features, svm_features;
     uint32_t xlevel;
     char model_id[48];
-    int vendor_override;
     /* Store the results of Centaur's CPUID instructions */
     uint32_t ext4_features;
     uint32_t xlevel2;
@@ -927,7 +926,6 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
                 kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_ECX);
 
     cpu_x86_fill_model_id(x86_cpu_def->model_id);
-    x86_cpu_def->vendor_override = 0;
 
     /* Call Centaur's CPUID instruction. */
     if (!strcmp(x86_cpu_def->vendor, CPUID_VENDOR_VIA)) {
@@ -1194,7 +1192,6 @@ static void x86_cpuid_set_vendor(Object *obj, const char *value,
         env->cpuid_vendor2 |= ((uint8_t)value[i + 4]) << (8 * i);
         env->cpuid_vendor3 |= ((uint8_t)value[i + 8]) << (8 * i);
     }
-    env->cpuid_vendor_override = 1;
 }
 
 static char *x86_cpuid_get_model_id(Object *obj, Error **errp)
@@ -1282,6 +1279,18 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
         def = &builtin_x86_defs[i];
         if (strcmp(name, def->name) == 0) {
             memcpy(x86_cpu_def, def, sizeof(*def));
+            /* sysenter isn't supported in compatibility mode on AMD,
+             * syscall isn't supported in compatibility mode on Intel.
+             * Normally we advertise the actual CPU vendor, but you can
+             * override this using the 'vendor' property if you want to use
+             * KVM's sysenter/syscall emulation in compatibility mode and
+             * when doing cross vendor migration
+             */
+            if (kvm_enabled()) {
+                uint32_t  ebx = 0, ecx = 0, edx = 0;
+                host_cpuid(0, 0, NULL, &ebx, &ecx, &edx);
+                x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx);
+            }
             return 0;
         }
     }
@@ -1357,7 +1366,6 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
                 x86_cpu_def->xlevel = numvalue;
             } else if (!strcmp(featurestr, "vendor")) {
                 pstrcpy(x86_cpu_def->vendor, sizeof(x86_cpu_def->vendor), val);
-                x86_cpu_def->vendor_override = 1;
             } else if (!strcmp(featurestr, "model_id")) {
                 pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
                         val);
@@ -1558,7 +1566,6 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
         goto out;
     }
     object_property_set_str(OBJECT(cpu), def->vendor, "vendor", &error);
-    env->cpuid_vendor_override = def->vendor_override;
     object_property_set_int(OBJECT(cpu), def->level, "level", &error);
     object_property_set_int(OBJECT(cpu), def->family, "family", &error);
     object_property_set_int(OBJECT(cpu), def->model, "model", &error);
@@ -1627,16 +1634,6 @@ static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx,
     *ebx = env->cpuid_vendor1;
     *edx = env->cpuid_vendor2;
     *ecx = env->cpuid_vendor3;
-
-    /* sysenter isn't supported on compatibility mode on AMD, syscall
-     * isn't supported in compatibility mode on Intel.
-     * Normally we advertise the actual cpu vendor, but you can override
-     * this if you want to use KVM's sysenter/syscall emulation
-     * in compatibility mode and when doing cross vendor migration
-     */
-    if (kvm_enabled() && ! env->cpuid_vendor_override) {
-        host_cpuid(0, 0, NULL, ebx, ecx, edx);
-    }
 }
 
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 3833e6f..62508dc 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -835,7 +835,6 @@ typedef struct CPUX86State {
     uint32_t cpuid_ext2_features;
     uint32_t cpuid_ext3_features;
     uint32_t cpuid_apic_id;
-    int cpuid_vendor_override;
     /* Store the results of Centaur's CPUID instructions */
     uint32_t cpuid_xlevel2;
     uint32_t cpuid_ext4_features;
commit 99b88a1708919934f4092f7b6dcc2cca9d4072e9
Author: Igor Mammedov <imammedo at redhat.com>
Date:   Mon Jan 21 15:06:36 2013 +0100

    target-i386: Replace uint32_t vendor fields by vendor string in x86_def_t
    
    Vendor property setter takes string as vendor value but cpudefs
    use uint32_t vendor[123] fields to define vendor value. It makes it
    difficult to unify and use property setter for values from cpudefs.
    
    Simplify code by using vendor property setter, vendor[123] fields
    are converted into vendor[13] array to keep its value. And vendor
    property setter is used to access/set value on CPU.
    
     - Make for() cycle reusable for the next patch by adding
       x86_cpu_vendor_words2str()
    
    Intel's CPUID spec[1] says:
    "
    5.1.1 ...
    These registers contain the ASCII string: GenuineIntel
    ...
    "
    
    List[2] of known vendor values shows that they all are 12 ASCII
    characters long, padded where necessary with space.
    
    Current supported values are all ASCII characters packed in
    ebx, edx, ecx. So lets state that QEMU supports 12 printable ASCII
    characters packed in ebx, edx, ecx registers for cpuid(0) instruction.
    
    *1 - http://www.intel.com/Assets/PDF/appnote/241618.pdf
    *2 - http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
    
    Signed-off-by: Igor Mammedov <imammedo at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index b75ea9f..043a21d 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -47,6 +47,18 @@
 #include "hw/apic_internal.h"
 #endif
 
+static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+                                     uint32_t vendor2, uint32_t vendor3)
+{
+    int i;
+    for (i = 0; i < 4; i++) {
+        dst[i] = vendor1 >> (8 * i);
+        dst[i + 4] = vendor2 >> (8 * i);
+        dst[i + 8] = vendor3 >> (8 * i);
+    }
+    dst[CPUID_VENDOR_SZ] = '\0';
+}
+
 /* feature flags taken from "Intel Processor Identification and the CPUID
  * Instruction" and AMD's "CPUID Specification".  In cases of disagreement
  * between feature naming conventions, aliases may be added.
@@ -337,7 +349,8 @@ static void add_flagname_to_bitmaps(const char *flagname,
 typedef struct x86_def_t {
     const char *name;
     uint32_t level;
-    uint32_t vendor1, vendor2, vendor3;
+    /* vendor is zero-terminated, 12 character ASCII string */
+    char vendor[CPUID_VENDOR_SZ + 1];
     int family;
     int model;
     int stepping;
@@ -398,9 +411,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "qemu64",
         .level = 4,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -417,9 +428,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "phenom",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 16,
         .model = 2,
         .stepping = 3,
@@ -445,9 +454,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "core2duo",
         .level = 10,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 15,
         .stepping = 11,
@@ -466,9 +473,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "kvm64",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -492,9 +497,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "qemu32",
         .level = 4,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 3,
         .stepping = 3,
@@ -505,9 +508,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "kvm32",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -522,9 +523,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "coreduo",
         .level = 10,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 14,
         .stepping = 8,
@@ -540,9 +539,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "486",
         .level = 1,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 4,
         .model = 0,
         .stepping = 0,
@@ -552,9 +549,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "pentium",
         .level = 1,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 5,
         .model = 4,
         .stepping = 3,
@@ -564,9 +559,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "pentium2",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 5,
         .stepping = 2,
@@ -576,9 +569,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "pentium3",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 7,
         .stepping = 3,
@@ -588,9 +579,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "athlon",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -604,9 +593,7 @@ static x86_def_t builtin_x86_defs[] = {
         .name = "n270",
         /* original is on level 10 */
         .level = 5,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 28,
         .stepping = 2,
@@ -625,9 +612,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Conroe",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -645,9 +630,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Penryn",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -666,9 +649,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Nehalem",
         .level = 2,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 2,
         .stepping = 3,
@@ -687,9 +668,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Westmere",
         .level = 11,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 44,
         .stepping = 1,
@@ -709,9 +688,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "SandyBridge",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 42,
         .stepping = 1,
@@ -734,9 +711,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Haswell",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_INTEL_1,
-        .vendor2 = CPUID_VENDOR_INTEL_2,
-        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .vendor = CPUID_VENDOR_INTEL,
         .family = 6,
         .model = 60,
         .stepping = 1,
@@ -764,9 +739,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G1",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -788,9 +761,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G2",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -814,9 +785,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G3",
         .level = 5,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 15,
         .model = 6,
         .stepping = 1,
@@ -842,9 +811,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G4",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 21,
         .model = 1,
         .stepping = 2,
@@ -874,9 +841,7 @@ static x86_def_t builtin_x86_defs[] = {
     {
         .name = "Opteron_G5",
         .level = 0xd,
-        .vendor1 = CPUID_VENDOR_AMD_1,
-        .vendor2 = CPUID_VENDOR_AMD_2,
-        .vendor3 = CPUID_VENDOR_AMD_3,
+        .vendor = CPUID_VENDOR_AMD,
         .family = 21,
         .model = 2,
         .stepping = 0,
@@ -937,9 +902,7 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
 
     x86_cpu_def->name = "host";
     host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
-    x86_cpu_def->vendor1 = ebx;
-    x86_cpu_def->vendor2 = edx;
-    x86_cpu_def->vendor3 = ecx;
+    x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx);
 
     host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
     x86_cpu_def->family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
@@ -967,9 +930,7 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
     x86_cpu_def->vendor_override = 0;
 
     /* Call Centaur's CPUID instruction. */
-    if (x86_cpu_def->vendor1 == CPUID_VENDOR_VIA_1 &&
-        x86_cpu_def->vendor2 == CPUID_VENDOR_VIA_2 &&
-        x86_cpu_def->vendor3 == CPUID_VENDOR_VIA_3) {
+    if (!strcmp(x86_cpu_def->vendor, CPUID_VENDOR_VIA)) {
         host_cpuid(0xC0000000, 0, &eax, &ebx, &ecx, &edx);
         eax = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
         if (eax >= 0xC0000001) {
@@ -1205,15 +1166,10 @@ static char *x86_cpuid_get_vendor(Object *obj, Error **errp)
     X86CPU *cpu = X86_CPU(obj);
     CPUX86State *env = &cpu->env;
     char *value;
-    int i;
 
     value = (char *)g_malloc(CPUID_VENDOR_SZ + 1);
-    for (i = 0; i < 4; i++) {
-        value[i    ] = env->cpuid_vendor1 >> (8 * i);
-        value[i + 4] = env->cpuid_vendor2 >> (8 * i);
-        value[i + 8] = env->cpuid_vendor3 >> (8 * i);
-    }
-    value[CPUID_VENDOR_SZ] = '\0';
+    x86_cpu_vendor_words2str(value, env->cpuid_vendor1, env->cpuid_vendor2,
+                             env->cpuid_vendor3);
     return value;
 }
 
@@ -1337,7 +1293,6 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
  */
 static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
 {
-    unsigned int i;
     char *featurestr; /* Single 'key=value" string being parsed */
     /* Features to be added */
     FeatureWordArray plus_features = { 0 };
@@ -1401,18 +1356,7 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
                 }
                 x86_cpu_def->xlevel = numvalue;
             } else if (!strcmp(featurestr, "vendor")) {
-                if (strlen(val) != 12) {
-                    fprintf(stderr, "vendor string must be 12 chars long\n");
-                    goto error;
-                }
-                x86_cpu_def->vendor1 = 0;
-                x86_cpu_def->vendor2 = 0;
-                x86_cpu_def->vendor3 = 0;
-                for(i = 0; i < 4; i++) {
-                    x86_cpu_def->vendor1 |= ((uint8_t)val[i    ]) << (8 * i);
-                    x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4]) << (8 * i);
-                    x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8]) << (8 * i);
-                }
+                pstrcpy(x86_cpu_def->vendor, sizeof(x86_cpu_def->vendor), val);
                 x86_cpu_def->vendor_override = 1;
             } else if (!strcmp(featurestr, "model_id")) {
                 pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
@@ -1613,10 +1557,7 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
         error_setg(&error, "Invalid cpu_model string format: %s", cpu_model);
         goto out;
     }
-    assert(def->vendor1);
-    env->cpuid_vendor1 = def->vendor1;
-    env->cpuid_vendor2 = def->vendor2;
-    env->cpuid_vendor3 = def->vendor3;
+    object_property_set_str(OBJECT(cpu), def->vendor, "vendor", &error);
     env->cpuid_vendor_override = def->vendor_override;
     object_property_set_int(OBJECT(cpu), def->level, "level", &error);
     object_property_set_int(OBJECT(cpu), def->family, "family", &error);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 27efe59..3833e6f 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -537,14 +537,14 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
 #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
+#define CPUID_VENDOR_INTEL "GenuineIntel"
 
 #define CPUID_VENDOR_AMD_1   0x68747541 /* "Auth" */
 #define CPUID_VENDOR_AMD_2   0x69746e65 /* "enti" */
 #define CPUID_VENDOR_AMD_3   0x444d4163 /* "cAMD" */
+#define CPUID_VENDOR_AMD   "AuthenticAMD"
 
-#define CPUID_VENDOR_VIA_1   0x746e6543 /* "Cent" */
-#define CPUID_VENDOR_VIA_2   0x48727561 /* "aurH" */
-#define CPUID_VENDOR_VIA_3   0x736c7561 /* "auls" */
+#define CPUID_VENDOR_VIA   "CentaurHauls"
 
 #define CPUID_MWAIT_IBE     (1 << 1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX     (1 << 0) /* enumeration supported */
commit 8ba8a69848f8b910207a4b57c68db9a7e92af578
Author: Igor Mammedov <imammedo at redhat.com>
Date:   Mon Jan 21 15:06:35 2013 +0100

    target-i386: Print deprecation warning if xlevel < 0x80000000
    
    Signed-off-by: Igor Mammedov <imammedo at redhat.com>
    Reviewed-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index df974d7..b75ea9f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1395,6 +1395,8 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
                     goto error;
                 }
                 if (numvalue < 0x80000000) {
+                    fprintf(stderr, "xlevel value shall always be >= 0x80000000"
+                            ", fixup will be removed in future versions\n");
                     numvalue += 0x80000000;
                 }
                 x86_cpu_def->xlevel = numvalue;
commit 7fc9b714eb4877ca83ce8e437ec93d34fca0eb3b
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Jan 21 01:02:28 2013 +0100

    target-i386: Drop redundant list of CPU definitions
    
    It is no longer needed since dropping cpudef config file support.
    Cleaning this up removes knowledge about other models from x86_def_t,
    in preparation for reusing x86_def_t as intermediate step towards pure
    QOM X86CPU subclasses.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 37a4b03..df974d7 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -335,7 +335,6 @@ static void add_flagname_to_bitmaps(const char *flagname,
 }
 
 typedef struct x86_def_t {
-    struct x86_def_t *next;
     const char *name;
     uint32_t level;
     uint32_t vendor1, vendor2, vendor3;
@@ -393,11 +392,7 @@ typedef struct x86_def_t {
 #define TCG_SVM_FEATURES 0
 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
 
-/* maintains list of cpu model definitions
- */
-static x86_def_t *x86_defs = {NULL};
-
-/* built-in cpu model definitions (deprecated)
+/* built-in CPU model definitions
  */
 static x86_def_t builtin_x86_defs[] = {
     {
@@ -1317,6 +1312,7 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, void *opaque,
 static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
 {
     x86_def_t *def;
+    int i;
 
     if (name == NULL) {
         return -1;
@@ -1326,7 +1322,8 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
         return 0;
     }
 
-    for (def = x86_defs; def; def = def->next) {
+    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
+        def = &builtin_x86_defs[i];
         if (strcmp(name, def->name) == 0) {
             memcpy(x86_cpu_def, def, sizeof(*def));
             return 0;
@@ -1512,8 +1509,10 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
     x86_def_t *def;
     char buf[256];
+    int i;
 
-    for (def = x86_defs; def; def = def->next) {
+    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
+        def = &builtin_x86_defs[i];
         snprintf(buf, sizeof(buf), "%s", def->name);
         (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
     }
@@ -1535,11 +1534,13 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
 {
     CpuDefinitionInfoList *cpu_list = NULL;
     x86_def_t *def;
+    int i;
 
-    for (def = x86_defs; def; def = def->next) {
+    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
         CpuDefinitionInfoList *entry;
         CpuDefinitionInfo *info;
 
+        def = &builtin_x86_defs[i];
         info = g_malloc0(sizeof(*info));
         info->name = g_strdup(def->name);
 
@@ -1662,7 +1663,6 @@ void x86_cpudef_setup(void)
 
     for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); ++i) {
         x86_def_t *def = &builtin_x86_defs[i];
-        def->next = x86_defs;
 
         /* Look for specific "cpudef" models that */
         /* have the QEMU version in .model_id */
@@ -1675,8 +1675,6 @@ void x86_cpudef_setup(void)
                 break;
             }
         }
-
-        x86_defs = def;
     }
 }
 
commit 4bfe910d4728807e7d80de152a7ef33dd608033f
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Mon Jan 21 01:00:24 2013 +0100

    target-i386: Simplify cpu_x86_find_by_name()
    
    Catch NULL name argument early to avoid repeated checks.
    Similarly, check for -cpu host early and untangle from iterating through
    model definitions. This prepares for introducing X86CPU subclasses.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index c5acaa7..37a4b03 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1318,20 +1318,22 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
 {
     x86_def_t *def;
 
-    for (def = x86_defs; def; def = def->next) {
-        if (name && !strcmp(name, def->name)) {
-            break;
-        }
+    if (name == NULL) {
+        return -1;
     }
-    if (kvm_enabled() && name && strcmp(name, "host") == 0) {
+    if (kvm_enabled() && strcmp(name, "host") == 0) {
         kvm_cpu_fill_host(x86_cpu_def);
-    } else if (!def) {
-        return -1;
-    } else {
-        memcpy(x86_cpu_def, def, sizeof(*def));
+        return 0;
     }
 
-    return 0;
+    for (def = x86_defs; def; def = def->next) {
+        if (strcmp(name, def->name) == 0) {
+            memcpy(x86_cpu_def, def, sizeof(*def));
+            return 0;
+        }
+    }
+
+    return -1;
 }
 
 /* Parse "+feature,-feature,feature=foo" CPU feature string
commit 8932cfdf7b95734c9b4a114b8ed0b4527af77ce7
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Tue Jan 22 18:25:09 2013 -0200

    pc: Generate APIC IDs according to CPU topology
    
    This keeps compatibility on machine-types pc-1.2 and older, and prints a
    warning in case the requested configuration won't get the correct
    topology.
    
    I couldn't think of a better way to warn about broken topology when in
    compat mode other than using error_report(). The warning message will
    probably be buried in a log file somewhere, but it's better than
    nothing.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index f9cfe78..b9a9b2e 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -233,11 +233,17 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
              initrd_filename, cpu_model, 1, 1);
 }
 
+static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
+{
+    enable_compat_apic_id_mode();
+    pc_init_pci(args);
+}
+
 /* PC machine init function for pc-0.14 to pc-1.2 */
 static void pc_init_pci_1_2(QEMUMachineInitArgs *args)
 {
     disable_kvm_pv_eoi();
-    pc_init_pci(args);
+    pc_init_pci_1_3(args);
 }
 
 /* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */
@@ -250,6 +256,7 @@ static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
     const char *initrd_filename = args->initrd_filename;
     const char *boot_device = args->boot_device;
     disable_kvm_pv_eoi();
+    enable_compat_apic_id_mode();
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -268,6 +275,7 @@ static void pc_init_isa(QEMUMachineInitArgs *args)
     if (cpu_model == NULL)
         cpu_model = "486";
     disable_kvm_pv_eoi();
+    enable_compat_apic_id_mode();
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -306,7 +314,7 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
 static QEMUMachine pc_machine_v1_3 = {
     .name = "pc-1.3",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_3,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_3,
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 75dc973..c5acaa7 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -23,6 +23,8 @@
 
 #include "cpu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/cpus.h"
+#include "topology.h"
 
 #include "qemu/option.h"
 #include "qemu/config-file.h"
@@ -2194,6 +2196,14 @@ void x86_cpu_realize(Object *obj, Error **errp)
     cpu_reset(CPU(cpu));
 }
 
+/* Enables contiguous-apic-ID mode, for compatibility */
+static bool compat_apic_id_mode;
+
+void enable_compat_apic_id_mode(void)
+{
+    compat_apic_id_mode = true;
+}
+
 /* Calculates initial APIC ID for a specific CPU index
  *
  * Currently we need to be able to calculate the APIC ID from the CPU index
@@ -2203,10 +2213,20 @@ void x86_cpu_realize(Object *obj, Error **errp)
  */
 uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
 {
-    /* right now APIC ID == CPU index. this will eventually change to use
-     * the CPU topology configuration properly
-     */
-    return cpu_index;
+    uint32_t correct_id;
+    static bool warned;
+
+    correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index);
+    if (compat_apic_id_mode) {
+        if (cpu_index != correct_id && !warned) {
+            error_report("APIC IDs set in compatibility mode, "
+                         "CPU topology won't match the configuration");
+            warned = true;
+        }
+        return cpu_index;
+    } else {
+        return correct_id;
+    }
 }
 
 static void x86_cpu_initfn(Object *obj)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 9442f08..27efe59 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1256,5 +1256,6 @@ void disable_kvm_pv_eoi(void);
 const char *get_register_name_32(unsigned int reg);
 
 uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index);
+void enable_compat_apic_id_mode(void);
 
 #endif /* CPU_I386_H */
commit 247c9de13f9d54a94734875000a9faea8168c8ca
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Wed Jan 23 15:58:27 2013 -0200

    target-i386: Topology & APIC ID utility functions
    
    This introduces utility functions for the APIC ID calculation, based on:
      IntelÂ® 64 Architecture Processor Topology Enumeration
      http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
    
    The code should be compatible with AMD's "Extended Method" described at:
      AMD CPUID Specification (Publication #25481)
      Section 3: Multiple Core Calcuation
    as long as:
     - nr_threads is set to 1;
     - OFFSET_IDX is assumed to be 0;
     - CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to
       apicid_core_width().
    
    Unit tests included.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/topology.h b/target-i386/topology.h
new file mode 100644
index 0000000..24ed525
--- /dev/null
+++ b/target-i386/topology.h
@@ -0,0 +1,136 @@
+/*
+ *  x86 CPU topology data structures and functions
+ *
+ *  Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TARGET_I386_TOPOLOGY_H
+#define TARGET_I386_TOPOLOGY_H
+
+/* This file implements the APIC-ID-based CPU topology enumeration logic,
+ * documented at the following document:
+ *   IntelÂ® 64 Architecture Processor Topology Enumeration
+ *   http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
+ *
+ * This code should be compatible with AMD's "Extended Method" described at:
+ *   AMD CPUID Specification (Publication #25481)
+ *   Section 3: Multiple Core Calcuation
+ * as long as:
+ *  nr_threads is set to 1;
+ *  OFFSET_IDX is assumed to be 0;
+ *  CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to apicid_core_width().
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "qemu/bitops.h"
+
+/* APIC IDs can be 32-bit, but beware: APIC IDs > 255 require x2APIC support
+ */
+typedef uint32_t apic_id_t;
+
+/* Return the bit width needed for 'count' IDs
+ */
+static unsigned apicid_bitwidth_for_count(unsigned count)
+{
+    g_assert(count >= 1);
+    if (count == 1) {
+        return 0;
+    }
+    return bitops_flsl(count - 1) + 1;
+}
+
+/* Bit width of the SMT_ID (thread ID) field on the APIC ID
+ */
+static inline unsigned apicid_smt_width(unsigned nr_cores, unsigned nr_threads)
+{
+    return apicid_bitwidth_for_count(nr_threads);
+}
+
+/* Bit width of the Core_ID field
+ */
+static inline unsigned apicid_core_width(unsigned nr_cores, unsigned nr_threads)
+{
+    return apicid_bitwidth_for_count(nr_cores);
+}
+
+/* Bit offset of the Core_ID field
+ */
+static inline unsigned apicid_core_offset(unsigned nr_cores,
+                                          unsigned nr_threads)
+{
+    return apicid_smt_width(nr_cores, nr_threads);
+}
+
+/* Bit offset of the Pkg_ID (socket ID) field
+ */
+static inline unsigned apicid_pkg_offset(unsigned nr_cores, unsigned nr_threads)
+{
+    return apicid_core_offset(nr_cores, nr_threads) +
+           apicid_core_width(nr_cores, nr_threads);
+}
+
+/* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID
+ *
+ * The caller must make sure core_id < nr_cores and smt_id < nr_threads.
+ */
+static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores,
+                                             unsigned nr_threads,
+                                             unsigned pkg_id,
+                                             unsigned core_id,
+                                             unsigned smt_id)
+{
+    return (pkg_id  << apicid_pkg_offset(nr_cores, nr_threads)) |
+           (core_id << apicid_core_offset(nr_cores, nr_threads)) |
+           smt_id;
+}
+
+/* Calculate thread/core/package IDs for a specific topology,
+ * based on (contiguous) CPU index
+ */
+static inline void x86_topo_ids_from_idx(unsigned nr_cores,
+                                         unsigned nr_threads,
+                                         unsigned cpu_index,
+                                         unsigned *pkg_id,
+                                         unsigned *core_id,
+                                         unsigned *smt_id)
+{
+    unsigned core_index = cpu_index / nr_threads;
+    *smt_id = cpu_index % nr_threads;
+    *core_id = core_index % nr_cores;
+    *pkg_id = core_index / nr_cores;
+}
+
+/* Make APIC ID for the CPU 'cpu_index'
+ *
+ * 'cpu_index' is a sequential, contiguous ID for the CPU.
+ */
+static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores,
+                                                unsigned nr_threads,
+                                                unsigned cpu_index)
+{
+    unsigned pkg_id, core_id, smt_id;
+    x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index,
+                          &pkg_id, &core_id, &smt_id);
+    return apicid_from_topo_ids(nr_cores, nr_threads, pkg_id, core_id, smt_id);
+}
+
+#endif /* TARGET_I386_TOPOLOGY_H */
diff --git a/tests/.gitignore b/tests/.gitignore
index f9041f3..38c94ef 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -10,4 +10,5 @@ test-qmp-commands.h
 test-qmp-commands
 test-qmp-input-strict
 test-qmp-marshal.c
+test-x86-cpuid
 *-test
diff --git a/tests/Makefile b/tests/Makefile
index 442b286..804ce42 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -45,6 +45,9 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
 check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
+check-unit-y += tests/test-x86-cpuid$(EXESUF)
+# all code tested by test-x86-cpuid is inside topology.h
+gcov-files-test-x86-cpuid-y =
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -72,12 +75,15 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-coroutine.o tests/test-string-output-visitor.o \
 	tests/test-string-input-visitor.o tests/test-qmp-output-visitor.o \
 	tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \
-	tests/test-qmp-commands.o tests/test-visitor-serialization.o
+	tests/test-qmp-commands.o tests/test-visitor-serialization.o \
+	tests/test-x86-cpuid.o
 
 test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 
+tests/test-x86-cpuid.o: QEMU_INCLUDES += -I$(SRC_PATH)/target-i386
+
 tests/check-qint$(EXESUF): tests/check-qint.o libqemuutil.a
 tests/check-qstring$(EXESUF): tests/check-qstring.o libqemuutil.a
 tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a
@@ -88,6 +94,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
+tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/test-x86-cpuid.c b/tests/test-x86-cpuid.c
new file mode 100644
index 0000000..8d9f96a
--- /dev/null
+++ b/tests/test-x86-cpuid.c
@@ -0,0 +1,110 @@
+/*
+ *  Test code for x86 CPUID and Topology functions
+ *
+ *  Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <glib.h>
+
+#include "topology.h"
+
+static void test_topo_bits(void)
+{
+    /* simple tests for 1 thread per core, 1 core per socket */
+    g_assert_cmpuint(apicid_smt_width(1, 1), ==, 0);
+    g_assert_cmpuint(apicid_core_width(1, 1), ==, 0);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 0), ==, 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 1), ==, 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 2), ==, 2);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 3), ==, 3);
+
+
+    /* Test field width calculation for multiple values
+     */
+    g_assert_cmpuint(apicid_smt_width(1, 2), ==, 1);
+    g_assert_cmpuint(apicid_smt_width(1, 3), ==, 2);
+    g_assert_cmpuint(apicid_smt_width(1, 4), ==, 2);
+
+    g_assert_cmpuint(apicid_smt_width(1, 14), ==, 4);
+    g_assert_cmpuint(apicid_smt_width(1, 15), ==, 4);
+    g_assert_cmpuint(apicid_smt_width(1, 16), ==, 4);
+    g_assert_cmpuint(apicid_smt_width(1, 17), ==, 5);
+
+
+    g_assert_cmpuint(apicid_core_width(30, 2), ==, 5);
+    g_assert_cmpuint(apicid_core_width(31, 2), ==, 5);
+    g_assert_cmpuint(apicid_core_width(32, 2), ==, 5);
+    g_assert_cmpuint(apicid_core_width(33, 2), ==, 6);
+
+
+    /* build a weird topology and see if IDs are calculated correctly
+     */
+
+    /* This will use 2 bits for thread ID and 3 bits for core ID
+     */
+    g_assert_cmpuint(apicid_smt_width(6, 3), ==, 2);
+    g_assert_cmpuint(apicid_core_width(6, 3), ==, 3);
+    g_assert_cmpuint(apicid_pkg_offset(6, 3), ==, 5);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 0), ==, 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1), ==, 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2), ==, 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 0), ==,
+                     (1 << 2) | 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 1), ==,
+                     (1 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 2), ==,
+                     (1 << 2) | 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 0), ==,
+                     (2 << 2) | 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 1), ==,
+                     (2 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 2), ==,
+                     (2 << 2) | 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 0), ==,
+                     (5 << 2) | 0);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 1), ==,
+                     (5 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 2), ==,
+                     (5 << 2) | 2);
+
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 0 * 3 + 0), ==,
+                     (1 << 5));
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 1 * 3 + 1), ==,
+                     (1 << 5) | (1 << 2) | 1);
+    g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 3 * 6 * 3 + 5 * 3 + 2), ==,
+                     (3 << 5) | (5 << 2) | 2);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    g_test_add_func("/cpuid/topology/basic", test_topo_bits);
+
+    g_test_run();
+
+    return 0;
+}
commit 1d934e89793d2828e04af93abd181e5ed5349ef4
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Wed Jan 23 15:51:18 2013 -0200

    pc: Set fw_cfg data based on APIC ID calculation
    
    This changes FW_CFG_MAX_CPUS and FW_CFG_NUMA to use apic_id_for_cpu(),
    so the NUMA table can be based on the APIC IDs, instead of CPU index
    (SeaBIOS knows nothing about CPU indexes, just APIC IDs).
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/pc.c b/hw/pc.c
index de53aa4..34b6dff 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -551,6 +551,18 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
     return index;
 }
 
+/* Calculates the limit to CPU APIC ID values
+ *
+ * This function returns the limit for the APIC ID value, so that all
+ * CPU APIC IDs are < pc_apic_id_limit().
+ *
+ * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
+ */
+static unsigned int pc_apic_id_limit(unsigned int max_cpus)
+{
+    return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
+}
+
 static void *bochs_bios_init(void)
 {
     void *fw_cfg;
@@ -558,9 +570,24 @@ static void *bochs_bios_init(void)
     size_t smbios_len;
     uint64_t *numa_fw_cfg;
     int i, j;
+    unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
-    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
+    /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
+     *
+     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
+     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
+     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
+     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
+     * may see".
+     *
+     * So, this means we must not use max_cpus, here, but the maximum possible
+     * APIC ID value, plus one.
+     *
+     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
+     *     the APIC ID, not the "CPU index"
+     */
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
@@ -579,21 +606,24 @@ static void *bochs_bios_init(void)
      * of nodes, one word for each VCPU->node and one word for each node to
      * hold the amount of memory.
      */
-    numa_fw_cfg = g_new0(uint64_t, 1 + max_cpus + nb_numa_nodes);
+    numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
     numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
     for (i = 0; i < max_cpus; i++) {
+        unsigned int apic_id = x86_cpu_apic_id_from_index(i);
+        assert(apic_id < apic_id_limit);
         for (j = 0; j < nb_numa_nodes; j++) {
             if (test_bit(i, node_cpumask[j])) {
-                numa_fw_cfg[i + 1] = cpu_to_le64(j);
+                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
                 break;
             }
         }
     }
     for (i = 0; i < nb_numa_nodes; i++) {
-        numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+        numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
     }
     fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
-                     (1 + max_cpus + nb_numa_nodes) * sizeof(*numa_fw_cfg));
+                     (1 + apic_id_limit + nb_numa_nodes) *
+                     sizeof(*numa_fw_cfg));
 
     return fw_cfg;
 }
commit 54fb7bf68516642c609738814f160ee2069301e8
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Tue Jan 22 18:25:05 2013 -0200

    cpus.h: Make constant smp_cores/smp_threads available on *-user
    
    The code that calculates the APIC ID will use smp_cores/smp_threads, so
    just define them as 1 on *-user to avoid #ifdefs in the code.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 81bd817..f7f6854 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -13,9 +13,16 @@ void cpu_synchronize_all_post_init(void);
 
 void qtest_clock_warp(int64_t dest);
 
+#ifndef CONFIG_USER_ONLY
 /* vl.c */
 extern int smp_cores;
 extern int smp_threads;
+#else
+/* *-user doesn't have configurable SMP topology */
+#define smp_cores   1
+#define smp_threads 1
+#endif
+
 void set_numa_modes(void);
 void set_cpu_log(const char *optarg);
 void set_cpu_log_filename(const char *optarg);
commit 70db922278f7b42375ead340b793ff3938835242
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Tue Jan 22 18:25:03 2013 -0200

    fw_cfg: Remove FW_CFG_MAX_CPUS from fw_cfg_init()
    
    PC will not use max_cpus for that field, so move it outside the common
    code so it can use a different value on PC.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index bdcd836..02618f2 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -504,7 +504,6 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
     fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
     fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
-    fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
     fw_cfg_reboot(s);
diff --git a/hw/pc.c b/hw/pc.c
index 780b1e4..de53aa4 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -560,7 +560,7 @@ static void *bochs_bios_init(void)
     int i, j;
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
-
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 6de810b..065ea87 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -413,6 +413,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     /* No PCI init: the BIOS will do it */
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 9ed303a..2778e45 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -299,6 +299,7 @@ static void ppc_heathrow_init(QEMUMachineInitArgs *args)
     /* No PCI init: the BIOS will do it */
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 035a011..9903f44 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1021,6 +1021,7 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
                  hwdef->ecc_version);
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
@@ -1665,6 +1666,7 @@ static void sun4d_hw_init(const struct sun4d_hwdef *hwdef, ram_addr_t RAM_size,
                "Sun4d");
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
@@ -1865,6 +1867,7 @@ static void sun4c_hw_init(const struct sun4c_hwdef *hwdef, ram_addr_t RAM_size,
                "Sun4c");
 
     fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/sun4u.c b/hw/sun4u.c
index b891b84..9fbda29 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -878,6 +878,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
                            (uint8_t *)&nd_table[0].macaddr);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
commit cb41bad3c2c7d82405cbe057c944ed4fd176d82a
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Tue Jan 22 18:25:04 2013 -0200

    target-i386: Introduce x86_cpu_apic_id_from_index() function
    
    This function will be used by both the CPU initialization code and the
    fw_cfg table initialization code.
    
    Later this function will be updated to generate APIC IDs according to
    the CPU topology.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index db9086e..75dc973 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2194,6 +2194,21 @@ void x86_cpu_realize(Object *obj, Error **errp)
     cpu_reset(CPU(cpu));
 }
 
+/* Calculates initial APIC ID for a specific CPU index
+ *
+ * Currently we need to be able to calculate the APIC ID from the CPU index
+ * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have
+ * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
+ * all CPUs up to max_cpus.
+ */
+uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
+{
+    /* right now APIC ID == CPU index. this will eventually change to use
+     * the CPU topology configuration properly
+     */
+    return cpu_index;
+}
+
 static void x86_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
@@ -2228,7 +2243,7 @@ static void x86_cpu_initfn(Object *obj)
                         x86_cpuid_get_tsc_freq,
                         x86_cpuid_set_tsc_freq, NULL, NULL, NULL);
 
-    env->cpuid_apic_id = cs->cpu_index;
+    env->cpuid_apic_id = x86_cpu_apic_id_from_index(cs->cpu_index);
 
     /* init various static tables used in TCG mode */
     if (tcg_enabled() && !inited) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 9d4fcf9..9442f08 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1255,4 +1255,6 @@ void disable_kvm_pv_eoi(void);
 /* Return name of 32-bit register, from a R_* constant */
 const char *get_register_name_32(unsigned int reg);
 
+uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index);
+
 #endif /* CPU_I386_H */
commit 83b17af5e619abdf11721826b08fa4f30e9dc4ee
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Tue Jan 22 18:25:02 2013 -0200

    target-i386: kvm: Set vcpu_id to APIC ID instead of CPU index
    
    The CPU ID in KVM is supposed to be the APIC ID, so change the
    KVM_CREATE_VCPU call to match it. The current behavior didn't break
    anything yet because today the APIC ID is assumed to be equal to the CPU
    index, but this won't be true in the future.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Reviewed-by: Marcelo Tosatti <mtosatti at redhat.com>
    Acked-by: Gleb Natapov <gleb at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 5f3f789..c440809 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -411,9 +411,10 @@ static void cpu_update_state(void *opaque, int running, RunState state)
     }
 }
 
-unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+unsigned long kvm_arch_vcpu_id(CPUState *cs)
 {
-    return cpu->cpu_index;
+    X86CPU *cpu = X86_CPU(cs);
+    return cpu->env.cpuid_apic_id;
 }
 
 int kvm_arch_init_vcpu(CPUState *cs)
commit b164e48ed1600055bc190aa3ab42c18004d2c711
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Tue Jan 22 18:25:01 2013 -0200

    kvm: Create kvm_arch_vcpu_id() function
    
    This will allow each architecture to define how the VCPU ID is set on
    the KVM_CREATE_VCPU ioctl call.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Acked-by: Gleb Natapov <gleb at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 22acf91..384ee66 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -196,6 +196,9 @@ int kvm_arch_init(KVMState *s);
 
 int kvm_arch_init_vcpu(CPUState *cpu);
 
+/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
+unsigned long kvm_arch_vcpu_id(CPUState *cpu);
+
 void kvm_arch_reset_vcpu(CPUState *cpu);
 
 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
diff --git a/kvm-all.c b/kvm-all.c
index 6278d61..363a358 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -222,7 +222,7 @@ int kvm_init_vcpu(CPUState *cpu)
 
     DPRINTF("kvm_init_vcpu\n");
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, cpu->cpu_index);
+    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
     if (ret < 0) {
         DPRINTF("kvm_create_vcpu failed\n");
         goto err;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3acff40..5f3f789 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -411,6 +411,11 @@ static void cpu_update_state(void *opaque, int running, RunState state)
     }
 }
 
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+    return cpu->cpu_index;
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     struct {
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 2f4f068..2c64c63 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -384,6 +384,11 @@ static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 
 #endif /* !defined (TARGET_PPC64) */
 
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+    return cpu->cpu_index;
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index add6a58..99deddf 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -76,6 +76,11 @@ int kvm_arch_init(KVMState *s)
     return 0;
 }
 
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+    return cpu->cpu_index;
+}
+
 int kvm_arch_init_vcpu(CPUState *cpu)
 {
     int ret = 0;
commit 2969475869a6f33b8883c2fbf90252dcf617902e
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Jan 17 18:59:29 2013 -0200

    pc: Reverse pc_init_pci() compatibility logic
    
    Currently, the pc-1.4 machine init function enables PV EOI and then
    calls the pc-1.2 machine init function. The problem with this approach
    is that now we can't enable any additional compatibility code inside the
    pc-1.2 init function because it would end up enabling the compatibility
    behavior on pc-1.3 and pc-1.4 as well.
    
    This reverses the logic so that the pc-1.2 machine init function will
    disable PV EOI, and then call the pc-1.4 machine init function.
    
    This way we can change older machine-types to enable compatibility
    behavior, and the newer machine-types (pc-1.3, pc-q35-1.4 and
    pc-i440fx-1.4) would just use the default behavior.
    
    (This means that one nice side-effect of this change is that pc-q35-1.4
    will get PV EOI enabled by default, too)
    
    It would be interesting to eventually change pc_init_pci_no_kvmclock()
    and pc_init_isa() to reuse pc_init_pci_1_2() as well (so we don't need
    to duplicate compatibility code on those two functions). But this will
    be probably much easier to do after we create a PCInitArgs struct for
    the PC initialization arguments, and/or after we use global-properties
    to implement the compatibility modes present in pc_init_pci_1_2().
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>
    Reviewed-by: Marcelo Tosatti <mtosatti at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 0a6923d..f9cfe78 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -233,12 +233,14 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
              initrd_filename, cpu_model, 1, 1);
 }
 
-static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
+/* PC machine init function for pc-0.14 to pc-1.2 */
+static void pc_init_pci_1_2(QEMUMachineInitArgs *args)
 {
-    enable_kvm_pv_eoi();
+    disable_kvm_pv_eoi();
     pc_init_pci(args);
 }
 
+/* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */
 static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
 {
     ram_addr_t ram_size = args->ram_size;
@@ -247,6 +249,7 @@ static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
     const char *kernel_cmdline = args->kernel_cmdline;
     const char *initrd_filename = args->initrd_filename;
     const char *boot_device = args->boot_device;
+    disable_kvm_pv_eoi();
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -264,6 +267,7 @@ static void pc_init_isa(QEMUMachineInitArgs *args)
     const char *boot_device = args->boot_device;
     if (cpu_model == NULL)
         cpu_model = "486";
+    disable_kvm_pv_eoi();
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -286,7 +290,7 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
     .name = "pc-i440fx-1.4",
     .alias = "pc",
     .desc = "Standard PC (i440FX + PIIX, 1996)",
-    .init = pc_init_pci_1_3,
+    .init = pc_init_pci,
     .max_cpus = 255,
     .is_default = 1,
     DEFAULT_MACHINE_OPTIONS,
@@ -302,7 +306,7 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
 static QEMUMachine pc_machine_v1_3 = {
     .name = "pc-1.3",
     .desc = "Standard PC",
-    .init = pc_init_pci_1_3,
+    .init = pc_init_pci,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_3,
@@ -342,7 +346,7 @@ static QEMUMachine pc_machine_v1_3 = {
 static QEMUMachine pc_machine_v1_2 = {
     .name = "pc-1.2",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_2,
@@ -386,7 +390,7 @@ static QEMUMachine pc_machine_v1_2 = {
 static QEMUMachine pc_machine_v1_1 = {
     .name = "pc-1.1",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_1,
@@ -422,7 +426,7 @@ static QEMUMachine pc_machine_v1_1 = {
 static QEMUMachine pc_machine_v1_0 = {
     .name = "pc-1.0",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_0,
@@ -438,7 +442,7 @@ static QEMUMachine pc_machine_v1_0 = {
 static QEMUMachine pc_machine_v0_15 = {
     .name = "pc-0.15",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_0_15,
@@ -471,7 +475,7 @@ static QEMUMachine pc_machine_v0_15 = {
 static QEMUMachine pc_machine_v0_14 = {
     .name = "pc-0.14",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_0_14, 
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index ac2ec24..db9086e 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -211,11 +211,12 @@ static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
         (1 << KVM_FEATURE_CLOCKSOURCE2) |
         (1 << KVM_FEATURE_ASYNC_PF) |
         (1 << KVM_FEATURE_STEAL_TIME) |
+        (1 << KVM_FEATURE_PV_EOI) |
         (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
 
-void enable_kvm_pv_eoi(void)
+void disable_kvm_pv_eoi(void)
 {
-    kvm_default_features |= (1UL << KVM_FEATURE_PV_EOI);
+    kvm_default_features &= ~(1UL << KVM_FEATURE_PV_EOI);
 }
 
 void host_cpuid(uint32_t function, uint32_t count,
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 4e091cd..9d4fcf9 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1250,7 +1250,7 @@ void do_smm_enter(CPUX86State *env1);
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
 
-void enable_kvm_pv_eoi(void);
+void disable_kvm_pv_eoi(void);
 
 /* Return name of 32-bit register, from a R_* constant */
 const char *get_register_name_32(unsigned int reg);
commit aa87d45855c7b255b451622a84a3e5b9b4393425
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Jan 17 18:59:28 2013 -0200

    target-i386: Don't set any KVM flag by default if KVM is disabled
    
    This is a cleanup that tries to solve two small issues:
    
     - We don't need a separate kvm_pv_eoi_features variable just to keep a
       constant calculated at compile-time, and this style would require
       adding a separate variable (that's declared twice because of the
       CONFIG_KVM ifdef) for each feature that's going to be
       enabled/disabled by machine-type compat code.
     - The pc-1.3 code is setting the kvm_pv_eoi flag on cpuid_kvm_features
       even when KVM is disabled at runtime. This small inconsistency in
       the cpuid_kvm_features field isn't a problem today because
       cpuid_kvm_features is ignored by the TCG code, but it may cause
       unexpected problems later when refactoring the CPUID handling code.
    
    This patch eliminates the kvm_pv_eoi_features variable and simply uses
    kvm_enabled() inside the enable_kvm_pv_eoi() compat function, so it
    enables kvm_pv_eoi only if KVM is enabled. I believe this makes the
    behavior of enable_kvm_pv_eoi() clearer and easier to understand.
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Acked-by: Gleb Natapov <gleb at redhat.com>
    Reviewed-by: Marcelo Tosatti <mtosatti at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 376d4c8..ac2ec24 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -206,22 +206,16 @@ typedef struct model_features_t {
 int check_cpuid = 0;
 int enforce_cpuid = 0;
 
-#if defined(CONFIG_KVM)
 static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
         (1 << KVM_FEATURE_NOP_IO_DELAY) |
         (1 << KVM_FEATURE_CLOCKSOURCE2) |
         (1 << KVM_FEATURE_ASYNC_PF) |
         (1 << KVM_FEATURE_STEAL_TIME) |
         (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
-static const uint32_t kvm_pv_eoi_features = (0x1 << KVM_FEATURE_PV_EOI);
-#else
-static uint32_t kvm_default_features = 0;
-static const uint32_t kvm_pv_eoi_features = 0;
-#endif
 
 void enable_kvm_pv_eoi(void)
 {
-    kvm_default_features |= kvm_pv_eoi_features;
+    kvm_default_features |= (1UL << KVM_FEATURE_PV_EOI);
 }
 
 void host_cpuid(uint32_t function, uint32_t count,
@@ -1602,7 +1596,9 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
         goto out;
     }
 
-    def->kvm_features |= kvm_default_features;
+    if (kvm_enabled()) {
+        def->kvm_features |= kvm_default_features;
+    }
     def->ext_features |= CPUID_EXT_HYPERVISOR;
 
     if (cpu_x86_parse_featurestr(def, features) < 0) {
commit d61a23ba77deefd88fd2457c2dba7d5bf13f5f5b
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Thu Jan 17 18:59:27 2013 -0200

    kvm: Add fake KVM_FEATURE_CLOCKSOURCE_STABLE_BIT for builds without KVM
    
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Acked-by: Marcelo Tosatti <mtosatti at redhat.com>
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6bdd513..22acf91 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -36,6 +36,7 @@
 #define KVM_FEATURE_ASYNC_PF     0
 #define KVM_FEATURE_STEAL_TIME   0
 #define KVM_FEATURE_PV_EOI       0
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0
 #endif
 
 extern int kvm_allowed;
commit dd51dc5262b718c5f045c86ce1175842ab42d2cd
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Thu Jan 17 17:30:08 2013 +0100

    target-openrisc: Clean up triple QOM casts
    
    Instead of calling openrisc_env_get_cpu(), casting to CPU() via the
    ENV_GET_CPU() compatibility macro and casting back to OPENRISC_CPU(),
    just call openrisc_env_get_cpu() directly.
    
    ENV_GET_CPU() is meant as workaround for target-independent code only.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-openrisc/exception_helper.c b/target-openrisc/exception_helper.c
index dab4148..0c53b77 100644
--- a/target-openrisc/exception_helper.c
+++ b/target-openrisc/exception_helper.c
@@ -23,7 +23,7 @@
 
 void HELPER(exception)(CPUOpenRISCState *env, uint32_t excp)
 {
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     raise_exception(cpu, excp);
 }
diff --git a/target-openrisc/fpu_helper.c b/target-openrisc/fpu_helper.c
index b184d5e..4615a36 100644
--- a/target-openrisc/fpu_helper.c
+++ b/target-openrisc/fpu_helper.c
@@ -68,7 +68,7 @@ static inline void update_fpcsr(OpenRISCCPU *cpu)
 uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val)
 {
     uint64_t itofd;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     itofd = int32_to_float64(val, &cpu->env.fp_status);
@@ -80,7 +80,7 @@ uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val)
 uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val)
 {
     uint32_t itofs;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     itofs = int32_to_float32(val, &cpu->env.fp_status);
@@ -92,7 +92,7 @@ uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val)
 uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val)
 {
     uint64_t ftoid;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     ftoid = float32_to_int64(val, &cpu->env.fp_status);
@@ -104,7 +104,7 @@ uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val)
 uint32_t HELPER(ftois)(CPUOpenRISCState *env, uint32_t val)
 {
     uint32_t ftois;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     set_float_exception_flags(0, &cpu->env.fp_status);
     ftois = float32_to_int32(val, &cpu->env.fp_status);
@@ -120,7 +120,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     uint64_t result;                                                      \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     result = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status);           \
     update_fpcsr(cpu);                                                    \
@@ -131,7 +131,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     uint32_t result;                                                      \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     result = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status);           \
     update_fpcsr(cpu);                                                    \
@@ -152,7 +152,7 @@ uint64_t helper_float_ ## name1 ## name2 ## _d(CPUOpenRISCState *env,     \
 {                                                                         \
     uint64_t result, temp, hi, lo;                                        \
     uint32_t val1, val2;                                                  \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     hi = env->fpmaddhi;                                                   \
     lo = env->fpmaddlo;                                                   \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
@@ -174,7 +174,7 @@ uint32_t helper_float_ ## name1 ## name2 ## _s(CPUOpenRISCState *env,     \
 {                                                                         \
     uint64_t result, temp, hi, lo;                                        \
     uint32_t val1, val2;                                                  \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     hi = cpu->env.fpmaddhi;                                               \
     lo = cpu->env.fpmaddlo;                                               \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
@@ -198,7 +198,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status);              \
     update_fpcsr(cpu);                                                    \
@@ -209,7 +209,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                              uint32_t fdt0, uint32_t fdt1)\
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status);              \
     update_fpcsr(cpu);                                                    \
@@ -227,7 +227,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float64_eq_quiet(fdt0, fdt1, &cpu->env.fp_status);             \
     update_fpcsr(cpu);                                                    \
@@ -238,7 +238,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float32_eq_quiet(fdt0, fdt1, &cpu->env.fp_status);             \
     update_fpcsr(cpu);                                                    \
@@ -253,7 +253,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float64_le(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
@@ -264,7 +264,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float32_le(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
@@ -278,7 +278,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env,               \
                                      uint64_t fdt0, uint64_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float64_lt(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
@@ -289,7 +289,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env,               \
                                      uint32_t fdt0, uint32_t fdt1)        \
 {                                                                         \
     int res;                                                              \
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));                    \
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
     set_float_exception_flags(0, &cpu->env.fp_status);                    \
     res = !float32_lt(fdt0, fdt1, &cpu->env.fp_status);                   \
     update_fpcsr(cpu);                                                    \
diff --git a/target-openrisc/int_helper.c b/target-openrisc/int_helper.c
index 20f9837..16cb5ab 100644
--- a/target-openrisc/int_helper.c
+++ b/target-openrisc/int_helper.c
@@ -48,7 +48,7 @@ uint32_t HELPER(mul32)(CPUOpenRISCState *env,
     uint64_t result;
     uint32_t high, cy;
 
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     result = (uint64_t)ra * rb;
     /* regisiers in or32 is 32bit, so 32 is NOT a magic number.
diff --git a/target-openrisc/interrupt_helper.c b/target-openrisc/interrupt_helper.c
index 79f5afe..a176441 100644
--- a/target-openrisc/interrupt_helper.c
+++ b/target-openrisc/interrupt_helper.c
@@ -23,7 +23,7 @@
 
 void HELPER(rfe)(CPUOpenRISCState *env)
 {
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 #ifndef CONFIG_USER_ONLY
     int need_flush_tlb = (cpu->env.sr & (SR_SM | SR_IME | SR_DME)) ^
                          (cpu->env.esr & (SR_SM | SR_IME | SR_DME));
diff --git a/target-openrisc/mmu.c b/target-openrisc/mmu.c
index 8364652..d354e1f 100644
--- a/target-openrisc/mmu.c
+++ b/target-openrisc/mmu.c
@@ -187,7 +187,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env,
     int ret = 0;
     hwaddr physical = 0;
     int prot = 0;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     ret = cpu_openrisc_get_phys_addr(cpu, &physical, &prot,
                                      address, rw);
@@ -209,7 +209,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env,
                                   target_ulong address, int rw, int mmu_idx)
 {
     int ret = 0;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     cpu_openrisc_raise_mmu_exception(cpu, address, rw, ret);
     ret = 1;
@@ -224,7 +224,7 @@ hwaddr cpu_get_phys_page_debug(CPUOpenRISCState *env,
 {
     hwaddr phys_addr;
     int prot;
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     if (cpu_openrisc_get_phys_addr(cpu, &phys_addr, &prot, addr, 0)) {
         return -1;
diff --git a/target-openrisc/sys_helper.c b/target-openrisc/sys_helper.c
index f160dc3..3c5f45a 100644
--- a/target-openrisc/sys_helper.c
+++ b/target-openrisc/sys_helper.c
@@ -30,7 +30,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
     int spr = (ra | offset);
     int idx;
 
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     switch (spr) {
     case TO_SPR(0, 0): /* VR */
@@ -177,7 +177,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
     int spr = (ra | offset);
     int idx;
 
-    OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
 
     switch (spr) {
     case TO_SPR(0, 0): /* VR */
commit 8486af93771302fa3154857a7c05612f0f61cc90
Author: Andreas FÃ¤rber <afaerber at suse.de>
Date:   Sat Jan 5 14:14:27 2013 +0100

    target-openrisc: Drop OpenRISCCPUList
    
    It was missed in 92a3136174f60ee45b113296cb2c2a5225b00369 (cpu:
    Introduce CPUListState struct) because its naming did not match the
    *CPUListState pattern. Use the generalized CPUListState instead.
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>

diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 56544d8..7a55112 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -170,11 +170,6 @@ OpenRISCCPU *cpu_openrisc_init(const char *cpu_model)
     return cpu;
 }
 
-typedef struct OpenRISCCPUList {
-    fprintf_function cpu_fprintf;
-    FILE *file;
-} OpenRISCCPUList;
-
 /* Sort alphabetically by type name, except for "any". */
 static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
@@ -196,7 +191,7 @@ static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b)
 static void openrisc_cpu_list_entry(gpointer data, gpointer user_data)
 {
     ObjectClass *oc = data;
-    OpenRISCCPUList *s = user_data;
+    CPUListState *s = user_data;
 
     (*s->cpu_fprintf)(s->file, "  %s\n",
                       object_class_get_name(oc));
@@ -204,7 +199,7 @@ static void openrisc_cpu_list_entry(gpointer data, gpointer user_data)
 
 void cpu_openrisc_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    OpenRISCCPUList s = {
+    CPUListState s = {
         .file = f,
         .cpu_fprintf = cpu_fprintf,
     };
commit 67bec53d9f2ccd3aa7d37a7e0689122587929220
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Thu Jan 24 12:50:28 2013 +0100

    ide: Add fall through annotations
    
    Add comments to help static analysers detect that these cases are
    intentional, and clean up some whitespace in the environment of these
    comments.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 14ad079..3743dc3 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1149,8 +1149,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         }
         ide_set_irq(s->bus);
         break;
+
     case WIN_VERIFY_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_VERIFY:
     case WIN_VERIFY_ONCE:
         /* do sector number check ? */
@@ -1158,8 +1160,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
         break;
+
     case WIN_READ_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_READ:
     case WIN_READ_ONCE:
         if (s->drive_kind == IDE_CD) {
@@ -1173,8 +1177,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         s->req_nb_sectors = 1;
         ide_sector_read(s);
         break;
+
     case WIN_WRITE_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_WRITE:
     case WIN_WRITE_ONCE:
     case CFA_WRITE_SECT_WO_ERASE:
@@ -1189,8 +1195,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         ide_transfer_start(s, s->io_buffer, 512, ide_sector_write);
         s->media_changed = 1;
         break;
+
     case WIN_MULTREAD_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_MULTREAD:
         if (!s->bs) {
             goto abort_cmd;
@@ -1202,8 +1210,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         s->req_nb_sectors = s->mult_sectors;
         ide_sector_read(s);
         break;
+
     case WIN_MULTWRITE_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_MULTWRITE:
     case CFA_WRITE_MULTI_WO_ERASE:
         if (!s->bs) {
@@ -1222,8 +1232,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write);
         s->media_changed = 1;
         break;
+
     case WIN_READDMA_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_READDMA:
     case WIN_READDMA_ONCE:
         if (!s->bs) {
@@ -1232,8 +1244,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
 	ide_cmd_lba48_transform(s, lba48);
         ide_sector_start_dma(s, IDE_DMA_READ);
         break;
+
     case WIN_WRITEDMA_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_WRITEDMA:
     case WIN_WRITEDMA_ONCE:
         if (!s->bs) {
@@ -1243,14 +1257,17 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
         ide_sector_start_dma(s, IDE_DMA_WRITE);
         s->media_changed = 1;
         break;
+
     case WIN_READ_NATIVE_MAX_EXT:
-	lba48 = 1;
+        lba48 = 1;
+        /* fall through */
     case WIN_READ_NATIVE_MAX:
 	ide_cmd_lba48_transform(s, lba48);
         ide_set_sector(s, s->nb_sectors - 1);
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
         break;
+
     case WIN_CHECKPOWERMODE1:
     case WIN_CHECKPOWERMODE2:
         s->error = 0;
commit 8689907266b649b757c2203d9652cbe928a3ae0b
Author: Vishvananda Ishaya <vishvananda at gmail.com>
Date:   Thu Jan 24 10:00:40 2013 -0800

    block: Create proper size file for disk mirror
    
    The qmp monitor command to mirror a disk was passing -1 for size
    along with the disk's backing file. This size of the resulting disk
    is the size of the backing file, which is incorrect if the disk
    has been resized. Therefore we should always pass in the size of
    the current disk.
    
    Signed-off-by: Vishvananda Ishaya <vishvananda at gmail.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index ac396f3..fdc573f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1282,11 +1282,11 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    bdrv_get_geometry(bs, &size);
+    size *= 512;
     if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
         /* create new image w/o backing file */
         assert(format && drv);
-        bdrv_get_geometry(bs, &size);
-        size *= 512;
         bdrv_img_create(target, format,
                         NULL, NULL, NULL, size, flags, &local_err);
     } else {
@@ -1299,7 +1299,7 @@ void qmp_drive_mirror(const char *device, const char *target,
             bdrv_img_create(target, format,
                             source->filename,
                             source->drv->format_name,
-                            NULL, -1, flags, &local_err);
+                            NULL, size, flags, &local_err);
             break;
         default:
             abort();
commit a26230218d7d66ec5cb1aec101ceaf0e7400ef7f
Author: Jason Baron <jbaron at redhat.com>
Date:   Fri Jan 4 14:44:42 2013 -0500

    ahci: Add migration support
    
    Jason tested these patches by migrating Windows 7 and Fedora 17 guests
    (while under I/O) on both piix with ahci attached and on q35 (which has
    a built-in AHCI controller).
    
    Signed-off-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f91cff2..ad0094f 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1199,6 +1199,82 @@ void ahci_reset(AHCIState *s)
     }
 }
 
+static const VMStateDescription vmstate_ahci_device = {
+    .name = "ahci port",
+    .version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_IDE_BUS(port, AHCIDevice),
+        VMSTATE_UINT32(port_state, AHCIDevice),
+        VMSTATE_UINT32(finished, AHCIDevice),
+        VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice),
+        VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice),
+        VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice),
+        VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice),
+        VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice),
+        VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice),
+        VMSTATE_UINT32(port_regs.cmd, AHCIDevice),
+        VMSTATE_UINT32(port_regs.tfdata, AHCIDevice),
+        VMSTATE_UINT32(port_regs.sig, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_err, AHCIDevice),
+        VMSTATE_UINT32(port_regs.scr_act, AHCIDevice),
+        VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice),
+        VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
+        VMSTATE_INT32(busy_slot, AHCIDevice),
+        VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static int ahci_state_post_load(void *opaque, int version_id)
+{
+    int i;
+    struct AHCIDevice *ad;
+    AHCIState *s = opaque;
+
+    for (i = 0; i < s->ports; i++) {
+        ad = &s->dev[i];
+        AHCIPortRegs *pr = &ad->port_regs;
+
+        map_page(&ad->lst,
+                 ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
+        map_page(&ad->res_fis,
+                 ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
+        /*
+         * All pending i/o should be flushed out on a migrate. However,
+         * we might not have cleared the busy_slot since this is done
+         * in a bh. Also, issue i/o against any slots that are pending.
+         */
+        if ((ad->busy_slot != -1) &&
+            !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
+            pr->cmd_issue &= ~(1 << ad->busy_slot);
+            ad->busy_slot = -1;
+        }
+        check_cmd(s, i);
+    }
+
+    return 0;
+}
+
+const VMStateDescription vmstate_ahci = {
+    .name = "ahci",
+    .version_id = 1,
+    .post_load = ahci_state_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports,
+                                     vmstate_ahci_device, AHCIDevice),
+        VMSTATE_UINT32(control_regs.cap, AHCIState),
+        VMSTATE_UINT32(control_regs.ghc, AHCIState),
+        VMSTATE_UINT32(control_regs.irqstatus, AHCIState),
+        VMSTATE_UINT32(control_regs.impl, AHCIState),
+        VMSTATE_UINT32(control_regs.version, AHCIState),
+        VMSTATE_UINT32(idp_index, AHCIState),
+        VMSTATE_INT32(ports, AHCIState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 typedef struct SysbusAHCIState {
     SysBusDevice busdev;
     AHCIState ahci;
@@ -1207,7 +1283,11 @@ typedef struct SysbusAHCIState {
 
 static const VMStateDescription vmstate_sysbus_ahci = {
     .name = "sysbus-ahci",
-    .unmigratable = 1,
+    .unmigratable = 1, /* Still buggy under I/O load */
+    .fields = (VMStateField []) {
+        VMSTATE_AHCI(ahci, AHCIPCIState),
+        VMSTATE_END_OF_LIST()
+    },
 };
 
 static void sysbus_ahci_reset(DeviceState *dev)
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 70d3b57..85f37fe 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -305,6 +305,16 @@ typedef struct AHCIPCIState {
     AHCIState ahci;
 } AHCIPCIState;
 
+extern const VMStateDescription vmstate_ahci;
+
+#define VMSTATE_AHCI(_field, _state) {                               \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(AHCIState),                                 \
+    .vmsd       = &vmstate_ahci,                                     \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, AHCIState),   \
+}
+
 typedef struct NCQFrame {
     uint8_t fis_type;
     uint8_t c;
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 1fb803d..cc30adc 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -79,9 +79,15 @@
 #define ICH9_IDP_INDEX          0x10
 #define ICH9_IDP_INDEX_LOG2     0x04
 
-static const VMStateDescription vmstate_ahci = {
-    .name = "ahci",
-    .unmigratable = 1,
+static const VMStateDescription vmstate_ich9_ahci = {
+    .name = "ich9_ahci",
+    .unmigratable = 1, /* Still buggy under I/O load */
+    .version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_PCI_DEVICE(card, AHCIPCIState),
+        VMSTATE_AHCI(ahci, AHCIPCIState),
+        VMSTATE_END_OF_LIST()
+    },
 };
 
 static void pci_ich9_reset(DeviceState *dev)
@@ -152,7 +158,7 @@ static void ich_ahci_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_INTEL_82801IR;
     k->revision = 0x02;
     k->class_id = PCI_CLASS_STORAGE_SATA;
-    dc->vmsd = &vmstate_ahci;
+    dc->vmsd = &vmstate_ich9_ahci;
     dc->reset = pci_ich9_reset;
 }
 
commit 4ac557c89b04d506c876a0a378e815d822261c8a
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Tue Jan 15 16:12:09 2013 +0100

    ahci: Change data types in preparation for migration
    
    The size of an int depends on the host, so in order to be able to
    migrate these fields, make them either int32_t or bool, depending on the
    use.
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 2d185cb..f91cff2 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -241,7 +241,7 @@ static void  ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
             if ((pr->cmd & PORT_CMD_FIS_ON) &&
                 !s->dev[port].init_d2h_sent) {
                 ahci_init_d2h(&s->dev[port]);
-                s->dev[port].init_d2h_sent = 1;
+                s->dev[port].init_d2h_sent = true;
             }
 
             check_cmd(s, port);
@@ -494,7 +494,7 @@ static void ahci_reset_port(AHCIState *s, int port)
     pr->scr_err = 0;
     pr->scr_act = 0;
     d->busy_slot = -1;
-    d->init_d2h_sent = 0;
+    d->init_d2h_sent = false;
 
     ide_state = &s->dev[port].port.ifs[0];
     if (!ide_state->bs) {
@@ -946,7 +946,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
             ide_state->hcyl = 0xeb;
             debug_print_fis(ide_state->io_buffer, 0x10);
             ide_state->feature = IDE_FEATURE_DMA;
-            s->dev[port].done_atapi_packet = 0;
+            s->dev[port].done_atapi_packet = false;
             /* XXX send PIO setup FIS */
         }
 
@@ -991,7 +991,7 @@ static int ahci_start_transfer(IDEDMA *dma)
 
     if (is_atapi && !ad->done_atapi_packet) {
         /* already prepopulated iobuffer */
-        ad->done_atapi_packet = 1;
+        ad->done_atapi_packet = true;
         goto out;
     }
 
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 735b379..70d3b57 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -281,9 +281,9 @@ struct AHCIDevice {
     QEMUBH *check_bh;
     uint8_t *lst;
     uint8_t *res_fis;
-    int done_atapi_packet;
-    int busy_slot;
-    int init_d2h_sent;
+    bool done_atapi_packet;
+    int32_t busy_slot;
+    bool init_d2h_sent;
     AHCICmdHdr *cur_cmd;
     NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
 };
@@ -295,7 +295,7 @@ typedef struct AHCIState {
     MemoryRegion idp;       /* Index-Data Pair I/O port space */
     unsigned idp_offset;    /* Offset of index in I/O port space */
     uint32_t idp_index;     /* Current IDP index */
-    int ports;
+    int32_t ports;
     qemu_irq irq;
     DMAContext *dma;
 } AHCIState;
commit 1147bb15a715a907a91195c2ed601fc926e43d46
Author: Jason Baron <jbaron at redhat.com>
Date:   Fri Jan 4 14:44:41 2013 -0500

    ahci: Remove unused AHCIDevice fields
    
    'dma_status' and 'dma_cb' are written to, but never read.
    Remove these fields in preparation for AHCI migration bits.
    
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Reviewed-by: Juan Quintela <quintela at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 21f50ea..2d185cb 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1035,11 +1035,10 @@ out:
 static void ahci_start_dma(IDEDMA *dma, IDEState *s,
                            BlockDriverCompletionFunc *dma_cb)
 {
+#ifdef DEBUG_AHCI
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
-
+#endif
     DPRINTF(ad->port_no, "\n");
-    ad->dma_cb = dma_cb;
-    ad->dma_status |= BM_STATUS_DMAING;
     s->io_buffer_offset = 0;
     dma_cb(s, 0);
 }
@@ -1095,7 +1094,6 @@ static int ahci_dma_set_unit(IDEDMA *dma, int unit)
 static int ahci_dma_add_status(IDEDMA *dma, int status)
 {
     AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
-    ad->dma_status |= status;
     DPRINTF(ad->port_no, "set status: %x\n", status);
 
     if (status & BM_STATUS_INT) {
@@ -1114,8 +1112,6 @@ static int ahci_dma_set_inactive(IDEDMA *dma)
     /* update d2h status */
     ahci_write_fis_d2h(ad, NULL);
 
-    ad->dma_cb = NULL;
-
     if (!ad->check_bh) {
         /* maybe we still have something to process, check later */
         ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 1200a56..735b379 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -281,11 +281,9 @@ struct AHCIDevice {
     QEMUBH *check_bh;
     uint8_t *lst;
     uint8_t *res_fis;
-    int dma_status;
     int done_atapi_packet;
     int busy_slot;
     int init_d2h_sent;
-    BlockDriverCompletionFunc *dma_cb;
     AHCICmdHdr *cur_cmd;
     NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
 };
commit 1b0952445522af73b0e78420a9078b3653923703
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Jan 22 15:01:12 2013 +0100

    hbitmap: add assertion on hbitmap_iter_init
    
    hbitmap_iter_init causes an out-of-bounds access when the "first"
    argument is or greater than or equal to the size of the bitmap.
    Forbid this with an assertion, and remove the failing testcase.
    
    Reported-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 7ddfb66..73f5d1d 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -128,7 +128,8 @@ void hbitmap_free(HBitmap *hb);
  * hbitmap_iter_init:
  * @hbi: HBitmapIter to initialize.
  * @hb: HBitmap to iterate on.
- * @first: First bit to visit (0-based).
+ * @first: First bit to visit (0-based, must be strictly less than the
+ * size of the bitmap).
  *
  * Set up @hbi to iterate on the HBitmap @hb.  hbitmap_iter_next will return
  * the lowest-numbered bit that is set in @hb, starting at @first.
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index fcc6a00..8c902f2 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -86,7 +86,9 @@ static void hbitmap_test_init(TestHBitmapData *data,
     data->bits = g_new0(unsigned long, n);
     data->size = size;
     data->granularity = granularity;
-    hbitmap_test_check(data, 0);
+    if (size) {
+        hbitmap_test_check(data, 0);
+    }
 }
 
 static void hbitmap_test_teardown(TestHBitmapData *data,
@@ -198,14 +200,6 @@ static void test_hbitmap_iter_partial(TestHBitmapData *data,
     hbitmap_test_check(data, L3 / 2);
 }
 
-static void test_hbitmap_iter_past(TestHBitmapData *data,
-                                    const void *unused)
-{
-    hbitmap_test_init(data, L3, 0);
-    hbitmap_test_set(data, 0, L3);
-    hbitmap_test_check(data, L3);
-}
-
 static void test_hbitmap_set_all(TestHBitmapData *data,
                                  const void *unused)
 {
@@ -388,7 +382,6 @@ int main(int argc, char **argv)
     hbitmap_test_add("/hbitmap/size/0", test_hbitmap_zero);
     hbitmap_test_add("/hbitmap/size/unaligned", test_hbitmap_unaligned);
     hbitmap_test_add("/hbitmap/iter/empty", test_hbitmap_iter_empty);
-    hbitmap_test_add("/hbitmap/iter/past", test_hbitmap_iter_past);
     hbitmap_test_add("/hbitmap/iter/partial", test_hbitmap_iter_partial);
     hbitmap_test_add("/hbitmap/iter/granularity", test_hbitmap_iter_granularity);
     hbitmap_test_add("/hbitmap/get/all", test_hbitmap_get_all);
diff --git a/util/hbitmap.c b/util/hbitmap.c
index fb7e01e..2aa487d 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -147,6 +147,7 @@ void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
 
     hbi->hb = hb;
     pos = first >> hb->granularity;
+    assert(pos < hb->size);
     hbi->pos = pos >> BITS_PER_LEVEL;
     hbi->granularity = hb->granularity;
 
commit 88ff0e48eedd679a9dc1122676d8aa29f8d07571
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Jan 22 15:01:11 2013 +0100

    mirror: do nothing on zero-sized disk
    
    On a zero-sized disk we need to break out of the job successfully
    before bdrv_dirty_iter_init is called, otherwise you will get an
    assertion failure with the next patch.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index 9347533..a62ad86 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -305,7 +305,7 @@ static void coroutine_fn mirror_run(void *opaque)
     }
 
     s->common.len = bdrv_getlength(bs);
-    if (s->common.len < 0) {
+    if (s->common.len <= 0) {
         block_job_completed(&s->common, s->common.len);
         return;
     }
commit 0e87ba2ccbf900cc5a56b95e0671e5a5d2c2f6a0
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Jan 17 21:45:28 2013 +0100

    block/vdi: Check for bad signature
    
    vdi_open did not check for a bad signature.
    This check was only in vdi_probe.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/vdi.c b/block/vdi.c
index 8b768bf..257a592 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -392,7 +392,11 @@ static int vdi_open(BlockDriverState *bs, int flags)
         header.disk_size &= ~(SECTOR_SIZE - 1);
     }
 
-    if (header.version != VDI_VERSION_1_1) {
+    if (header.signature != VDI_SIGNATURE) {
+        logout("bad vdi signature %08x\n", header.signature);
+        ret = -EMEDIUMTYPE;
+        goto fail;
+    } else if (header.version != VDI_VERSION_1_1) {
         logout("unsupported version %u.%u\n",
                header.version >> 16, header.version & 0xffff);
         ret = -ENOTSUP;
commit 8937f8222c14ab42140a5645c34e17cc620c05bb
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Jan 17 21:45:27 2013 +0100

    block/vdi: Improved return values from vdi_open
    
    vdi_open returned -1 in case of any error, but it should return an
    error code (negative value of errno or -EMEDIUMTYPE).
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/vdi.c b/block/vdi.c
index 0e1ed61..8b768bf 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -369,10 +369,12 @@ static int vdi_open(BlockDriverState *bs, int flags)
     BDRVVdiState *s = bs->opaque;
     VdiHeader header;
     size_t bmap_size;
+    int ret;
 
     logout("\n");
 
-    if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) {
+    ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+    if (ret < 0) {
         goto fail;
     }
 
@@ -393,30 +395,38 @@ static int vdi_open(BlockDriverState *bs, int flags)
     if (header.version != VDI_VERSION_1_1) {
         logout("unsupported version %u.%u\n",
                header.version >> 16, header.version & 0xffff);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.offset_bmap % SECTOR_SIZE != 0) {
         /* We only support block maps which start on a sector boundary. */
         logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.offset_data % SECTOR_SIZE != 0) {
         /* We only support data blocks which start on a sector boundary. */
         logout("unsupported data offset 0x%x B\n", header.offset_data);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.sector_size != SECTOR_SIZE) {
         logout("unsupported sector size %u B\n", header.sector_size);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.block_size != 1 * MiB) {
         logout("unsupported block size %u B\n", header.block_size);
+        ret = -ENOTSUP;
         goto fail;
     } else if (header.disk_size >
                (uint64_t)header.blocks_in_image * header.block_size) {
         logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
+        ret = -ENOTSUP;
         goto fail;
     } else if (!uuid_is_null(header.uuid_link)) {
         logout("link uuid != 0, unsupported\n");
+        ret = -ENOTSUP;
         goto fail;
     } else if (!uuid_is_null(header.uuid_parent)) {
         logout("parent uuid != 0, unsupported\n");
+        ret = -ENOTSUP;
         goto fail;
     }
 
@@ -432,7 +442,8 @@ static int vdi_open(BlockDriverState *bs, int flags)
     if (bmap_size > 0) {
         s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
     }
-    if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) {
+    ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+    if (ret < 0) {
         goto fail_free_bmap;
     }
 
@@ -448,7 +459,7 @@ static int vdi_open(BlockDriverState *bs, int flags)
     g_free(s->bmap);
 
  fail:
-    return -1;
+    return ret;
 }
 
 static int vdi_reopen_prepare(BDRVReopenState *state,
commit 9f0470bb2d7942c28977296ff2598cdf30886e07
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Jan 17 21:45:26 2013 +0100

    block/vdi: Improve debug output for signature
    
    The signature is a 32 bit value and needs up to 8 hex digits for printing.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/vdi.c b/block/vdi.c
index 021abaa..0e1ed61 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -246,7 +246,7 @@ static void vdi_header_print(VdiHeader *header)
 {
     char uuid[37];
     logout("text        %s", header->text);
-    logout("signature   0x%04x\n", header->signature);
+    logout("signature   0x%08x\n", header->signature);
     logout("header size 0x%04x\n", header->header_size);
     logout("image type  0x%04x\n", header->image_type);
     logout("image flags 0x%04x\n", header->image_flags);
commit 15bac0d54f78adb5e255155a69e56ab7f6d8c8ea
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Jan 17 21:45:25 2013 +0100

    block: Use error code EMEDIUMTYPE for wrong format in some block drivers
    
    This improves error reports for bochs, cow, qcow, qcow2, qed and vmdk
    when a file with the wrong format is selected.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/bochs.c b/block/bochs.c
index 1b1d9cd..3737583 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -126,7 +126,7 @@ static int bochs_open(BlockDriverState *bs, int flags)
         strcmp(bochs.subtype, GROWING_TYPE) ||
 	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
 	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        goto fail;
+        return -EMEDIUMTYPE;
     }
 
     if (le32_to_cpu(bochs.version) == HEADER_V1) {
diff --git a/block/cow.c b/block/cow.c
index a33ce95..4baf904 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -73,7 +73,7 @@ static int cow_open(BlockDriverState *bs, int flags)
     }
 
     if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
         goto fail;
     }
 
diff --git a/block/qcow.c b/block/qcow.c
index 4276610..a7135ee 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -112,7 +112,7 @@ static int qcow_open(BlockDriverState *bs, int flags)
     be64_to_cpus(&header.l1_table_offset);
 
     if (header.magic != QCOW_MAGIC) {
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
         goto fail;
     }
     if (header.version != QCOW_VERSION) {
diff --git a/block/qcow2.c b/block/qcow2.c
index f6abff6..7610e56 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,7 +311,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
     be32_to_cpus(&header.nb_snapshots);
 
     if (header.magic != QCOW_MAGIC) {
-        ret = -EINVAL;
+        ret = -EMEDIUMTYPE;
         goto fail;
     }
     if (header.version < 2 || header.version > 3) {
diff --git a/block/qed.c b/block/qed.c
index cf85d8f..b8515e5 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -390,7 +390,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
     qed_header_le_to_cpu(&le_header, &s->header);
 
     if (s->header.magic != QED_MAGIC) {
-        return -EINVAL;
+        return -EMEDIUMTYPE;
     }
     if (s->header.features & ~QED_FEATURE_MASK) {
         /* image uses unsupported feature bits */
diff --git a/block/vmdk.c b/block/vmdk.c
index 19298c2..8333afb 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -616,7 +616,7 @@ static int vmdk_open_sparse(BlockDriverState *bs,
             return vmdk_open_vmdk4(bs, file, flags);
             break;
         default:
-            return -EINVAL;
+            return -EMEDIUMTYPE;
             break;
     }
 }
@@ -718,7 +718,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
     }
     buf[2047] = '\0';
     if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        return -EINVAL;
+        return -EMEDIUMTYPE;
     }
     if (strcmp(ct, "monolithicFlat") &&
         strcmp(ct, "twoGbMaxExtentSparse") &&
commit 02582abd48aa3d860015e9a8fcd0d7ec1c34ec62
Author: Stefan Weil <sw at weilnetz.de>
Date:   Thu Jan 17 21:45:24 2013 +0100

    block: Add special error code for wrong format
    
    The block drivers need a special error code for "wrong format".
    From the available error codes EMEDIUMTYPE fits best.
    It is not available on all platforms, so a definition in
    qemu-common.h and a specific error report are needed.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index ad25b9b..ac396f3 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -617,8 +617,13 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
 
     ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
     if (ret < 0) {
-        error_report("could not open disk image %s: %s",
-                     file, strerror(-ret));
+        if (ret == -EMEDIUMTYPE) {
+            error_report("could not open disk image %s: not in %s format",
+                         file, drv->format_name);
+        } else {
+            error_report("could not open disk image %s: %s",
+                         file, strerror(-ret));
+        }
         goto err;
     }
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index ca464bb..af2379f 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -68,6 +68,9 @@
 #if !defined(ECANCELED)
 #define ECANCELED 4097
 #endif
+#if !defined(EMEDIUMTYPE)
+#define EMEDIUMTYPE 4098
+#endif
 #ifndef TIME_MAX
 #define TIME_MAX LONG_MAX
 #endif
commit 884fea4e87fbc6daf4e6df618bb3cadc188dcc6b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Jan 22 09:03:15 2013 +0100

    mirror: support arbitrarily-sized iterations
    
    Yet another optimization is to extend the mirroring iteration to include more
    adjacent dirty blocks.  This limits the number of I/O operations and makes
    mirroring efficient even with a small granularity.  Most of the infrastructure
    is already in place; we only need to put a loop around the computation of
    the origin and sector count of the iteration.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index 4696dc8..9347533 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -140,7 +140,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->common.bs;
     int nb_sectors, sectors_per_chunk, nb_chunks;
-    int64_t end, sector_num, chunk_num, next_sector, hbitmap_next_sector;
+    int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
     MirrorOp *op;
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -152,45 +152,82 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
     }
 
     hbitmap_next_sector = s->sector_num;
+    sector_num = s->sector_num;
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    end = s->common.len >> BDRV_SECTOR_BITS;
 
-    /* If we have no backing file yet in the destination, and the cluster size
-     * is very large, we need to do COW ourselves.  The first time a cluster is
-     * copied, copy it entirely.
+    /* Extend the QEMUIOVector to include all adjacent blocks that will
+     * be copied in this operation.
+     *
+     * We have to do this if we have no backing file yet in the destination,
+     * and the cluster size is very large.  Then we need to do COW ourselves.
+     * The first time a cluster is copied, copy it entirely.  Note that,
+     * because both the granularity and the cluster size are powers of two,
+     * the number of sectors to copy cannot exceed one cluster.
      *
-     * Because both the granularity and the cluster size are powers of two, the
-     * number of sectors to copy cannot exceed one cluster.
+     * We also want to extend the QEMUIOVector to include more adjacent
+     * dirty blocks if possible, to limit the number of I/O operations and
+     * run efficiently even with a small granularity.
      */
-    sector_num = s->sector_num;
-    sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
-    chunk_num = sector_num / sectors_per_chunk;
-    if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
-        trace_mirror_cow(s, sector_num);
-        bdrv_round_to_clusters(s->target,
-                               sector_num, sectors_per_chunk,
-                               &sector_num, &nb_sectors);
-
-        /* The rounding may make us copy sectors before the
-         * first dirty one.
-         */
-        chunk_num = sector_num / sectors_per_chunk;
-    }
+    nb_chunks = 0;
+    nb_sectors = 0;
+    next_sector = sector_num;
+    next_chunk = sector_num / sectors_per_chunk;
 
     /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
-    while (test_bit(chunk_num, s->in_flight_bitmap)) {
+    while (test_bit(next_chunk, s->in_flight_bitmap)) {
         trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
         qemu_coroutine_yield();
     }
 
-    end = s->common.len >> BDRV_SECTOR_BITS;
-    nb_sectors = MIN(nb_sectors, end - sector_num);
-    nb_chunks = (nb_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
-    while (s->buf_free_count < nb_chunks) {
-        trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
-        qemu_coroutine_yield();
-    }
+    do {
+        int added_sectors, added_chunks;
+
+        if (!bdrv_get_dirty(source, next_sector) ||
+            test_bit(next_chunk, s->in_flight_bitmap)) {
+            assert(nb_sectors > 0);
+            break;
+        }
+
+        added_sectors = sectors_per_chunk;
+        if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
+            bdrv_round_to_clusters(s->target,
+                                   next_sector, added_sectors,
+                                   &next_sector, &added_sectors);
+
+            /* On the first iteration, the rounding may make us copy
+             * sectors before the first dirty one.
+             */
+            if (next_sector < sector_num) {
+                assert(nb_sectors == 0);
+                sector_num = next_sector;
+                next_chunk = next_sector / sectors_per_chunk;
+            }
+        }
+
+        added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
+        added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+
+        /* When doing COW, it may happen that there is not enough space for
+         * a full cluster.  Wait if that is the case.
+         */
+        while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
+            trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
+            qemu_coroutine_yield();
+        }
+        if (s->buf_free_count < nb_chunks + added_chunks) {
+            trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
+            break;
+        }
+
+        /* We have enough free space to copy these sectors.  */
+        bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
 
-    /* We have enough free space to copy these sectors.  */
-    bitmap_set(s->in_flight_bitmap, chunk_num, nb_chunks);
+        nb_sectors += added_sectors;
+        nb_chunks += added_chunks;
+        next_sector += added_sectors;
+        next_chunk += added_chunks;
+    } while (next_sector < end);
 
     /* Allocate a MirrorOp that is used as an AIO callback.  */
     op = g_slice_new(MirrorOp);
diff --git a/trace-events b/trace-events
index b4a23cd..2b28076 100644
--- a/trace-events
+++ b/trace-events
@@ -89,6 +89,7 @@ mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %
 mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
 mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
 mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
commit 402a47411bff5e849dc880dd08ba7e6564e6e4f4
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Jan 22 09:03:14 2013 +0100

    mirror: support more than one in-flight AIO operation
    
    With AIO support in place, we can start copying more than one chunk
    in parallel.  This patch introduces the required infrastructure for
    this: the buffer is split into multiple granularity-sized chunks,
    and there is a free list to access them.
    
    Because of copy-on-write, a single operation may already require
    multiple chunks to be available on the free list.
    
    In addition, two different iterations on the HBitmap may want to
    copy the same cluster.  We avoid this by keeping a bitmap of in-flight
    I/O operations, and blocking until the previous iteration completes.
    This should be a pretty rare occurrence, though; as long as there is
    no overlap the next iteration can start before the previous one finishes.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index 896972c..4696dc8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -17,7 +17,15 @@
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
 
-#define SLICE_TIME 100000000ULL /* ns */
+#define SLICE_TIME    100000000ULL /* ns */
+#define MAX_IN_FLIGHT 16
+
+/* The mirroring buffer is a list of granularity-sized chunks.
+ * Free chunks are organized in a list.
+ */
+typedef struct MirrorBuffer {
+    QSIMPLEQ_ENTRY(MirrorBuffer) next;
+} MirrorBuffer;
 
 typedef struct MirrorBlockJob {
     BlockJob common;
@@ -33,7 +41,10 @@ typedef struct MirrorBlockJob {
     unsigned long *cow_bitmap;
     HBitmapIter hbi;
     uint8_t *buf;
+    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
+    int buf_free_count;
 
+    unsigned long *in_flight_bitmap;
     int in_flight;
     int ret;
 } MirrorBlockJob;
@@ -41,7 +52,6 @@ typedef struct MirrorBlockJob {
 typedef struct MirrorOp {
     MirrorBlockJob *s;
     QEMUIOVector qiov;
-    struct iovec iov;
     int64_t sector_num;
     int nb_sectors;
 } MirrorOp;
@@ -62,15 +72,24 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
 static void mirror_iteration_done(MirrorOp *op, int ret)
 {
     MirrorBlockJob *s = op->s;
+    struct iovec *iov;
     int64_t chunk_num;
-    int nb_chunks, sectors_per_chunk;
+    int i, nb_chunks, sectors_per_chunk;
 
     trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
 
     s->in_flight--;
+    iov = op->qiov.iov;
+    for (i = 0; i < op->qiov.niov; i++) {
+        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
+        QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
+        s->buf_free_count++;
+    }
+
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
     chunk_num = op->sector_num / sectors_per_chunk;
     nb_chunks = op->nb_sectors / sectors_per_chunk;
+    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
     if (s->cow_bitmap && ret >= 0) {
         bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
     }
@@ -120,8 +139,8 @@ static void mirror_read_complete(void *opaque, int ret)
 static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->common.bs;
-    int nb_sectors, sectors_per_chunk;
-    int64_t end, sector_num, chunk_num;
+    int nb_sectors, sectors_per_chunk, nb_chunks;
+    int64_t end, sector_num, chunk_num, next_sector, hbitmap_next_sector;
     MirrorOp *op;
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -132,6 +151,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
         assert(s->sector_num >= 0);
     }
 
+    hbitmap_next_sector = s->sector_num;
+
     /* If we have no backing file yet in the destination, and the cluster size
      * is very large, we need to do COW ourselves.  The first time a cluster is
      * copied, copy it entirely.
@@ -147,19 +168,56 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
         bdrv_round_to_clusters(s->target,
                                sector_num, sectors_per_chunk,
                                &sector_num, &nb_sectors);
+
+        /* The rounding may make us copy sectors before the
+         * first dirty one.
+         */
+        chunk_num = sector_num / sectors_per_chunk;
+    }
+
+    /* Wait for I/O to this cluster (from a previous iteration) to be done.  */
+    while (test_bit(chunk_num, s->in_flight_bitmap)) {
+        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        qemu_coroutine_yield();
     }
 
     end = s->common.len >> BDRV_SECTOR_BITS;
     nb_sectors = MIN(nb_sectors, end - sector_num);
+    nb_chunks = (nb_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+    while (s->buf_free_count < nb_chunks) {
+        trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
+        qemu_coroutine_yield();
+    }
+
+    /* We have enough free space to copy these sectors.  */
+    bitmap_set(s->in_flight_bitmap, chunk_num, nb_chunks);
 
     /* Allocate a MirrorOp that is used as an AIO callback.  */
     op = g_slice_new(MirrorOp);
     op->s = s;
-    op->iov.iov_base = s->buf;
-    op->iov.iov_len  = nb_sectors * 512;
     op->sector_num = sector_num;
     op->nb_sectors = nb_sectors;
-    qemu_iovec_init_external(&op->qiov, &op->iov, 1);
+
+    /* Now make a QEMUIOVector taking enough granularity-sized chunks
+     * from s->buf_free.
+     */
+    qemu_iovec_init(&op->qiov, nb_chunks);
+    next_sector = sector_num;
+    while (nb_chunks-- > 0) {
+        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
+        s->buf_free_count--;
+        qemu_iovec_add(&op->qiov, buf, s->granularity);
+
+        /* Advance the HBitmapIter in parallel, so that we do not examine
+         * the same sector twice.
+         */
+        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
+            hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
+        }
+
+        next_sector += sectors_per_chunk;
+    }
 
     bdrv_reset_dirty(source, sector_num, nb_sectors);
 
@@ -170,6 +228,23 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
                    mirror_read_complete, op);
 }
 
+static void mirror_free_init(MirrorBlockJob *s)
+{
+    int granularity = s->granularity;
+    size_t buf_size = s->buf_size;
+    uint8_t *buf = s->buf;
+
+    assert(s->buf_free_count == 0);
+    QSIMPLEQ_INIT(&s->buf_free);
+    while (buf_size != 0) {
+        MirrorBuffer *cur = (MirrorBuffer *)buf;
+        QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
+        s->buf_free_count++;
+        buf_size -= granularity;
+        buf += granularity;
+    }
+}
+
 static void mirror_drain(MirrorBlockJob *s)
 {
     while (s->in_flight > 0) {
@@ -198,6 +273,9 @@ static void coroutine_fn mirror_run(void *opaque)
         return;
     }
 
+    length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
+    s->in_flight_bitmap = bitmap_new(length);
+
     /* If we have no backing file yet in the destination, we cannot let
      * the destination do COW.  Instead, we copy sectors around the
      * dirty data if needed.  We need a bitmap to do that.
@@ -208,7 +286,6 @@ static void coroutine_fn mirror_run(void *opaque)
         bdrv_get_info(s->target, &bdi);
         if (s->granularity < bdi.cluster_size) {
             s->buf_size = MAX(s->buf_size, bdi.cluster_size);
-            length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
             s->cow_bitmap = bitmap_new(length);
         }
     }
@@ -216,6 +293,7 @@ static void coroutine_fn mirror_run(void *opaque)
     end = s->common.len >> BDRV_SECTOR_BITS;
     s->buf = qemu_blockalign(bs, s->buf_size);
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    mirror_free_init(s);
 
     if (s->mode != MIRROR_SYNC_MODE_NONE) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -261,8 +339,9 @@ static void coroutine_fn mirror_run(void *opaque)
          */
         if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
             s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-            if (s->in_flight > 0) {
-                trace_mirror_yield(s, s->in_flight, cnt);
+            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+                (cnt == 0 && s->in_flight > 0)) {
+                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
                 qemu_coroutine_yield();
                 continue;
             } else if (cnt != 0) {
@@ -354,6 +433,7 @@ immediate_exit:
     assert(s->in_flight == 0);
     qemu_vfree(s->buf);
     g_free(s->cow_bitmap);
+    g_free(s->in_flight_bitmap);
     bdrv_set_dirty_tracking(bs, 0);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
diff --git a/trace-events b/trace-events
index 43e6b73..b4a23cd 100644
--- a/trace-events
+++ b/trace-events
@@ -86,7 +86,9 @@ mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64
 mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
 mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
 mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
-mirror_yield(void *s, int64_t cnt, int in_flight) "s %p dirty count %"PRId64" in_flight %d"
+mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
+mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
commit 08e4ed6cdeeee7912072cf14aa8ab6c60dacb4fb
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Jan 22 09:03:13 2013 +0100

    mirror: add buf-size argument to drive-mirror
    
    This makes sense when the next commit starts using the extra buffer space
    to perform many I/O operations asynchronously.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index fc6b9b7..896972c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -207,7 +207,7 @@ static void coroutine_fn mirror_run(void *opaque)
     if (backing_filename[0] && !s->target->backing_hd) {
         bdrv_get_info(s->target, &bdi);
         if (s->granularity < bdi.cluster_size) {
-            s->buf_size = bdi.cluster_size;
+            s->buf_size = MAX(s->buf_size, bdi.cluster_size);
             length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
             s->cow_bitmap = bitmap_new(length);
         }
@@ -416,8 +416,8 @@ static BlockJobType mirror_job_type = {
 };
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, int64_t granularity, MirrorSyncMode mode,
-                  BlockdevOnError on_source_error,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
@@ -455,7 +455,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->target = target;
     s->mode = mode;
     s->granularity = granularity;
-    s->buf_size = granularity;
+    s->buf_size = MAX(buf_size, granularity);
 
     bdrv_set_dirty_tracking(bs, granularity);
     bdrv_set_enable_write_cache(s->target, true);
diff --git a/blockdev.c b/blockdev.c
index 07fd327..ad25b9b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1188,12 +1188,15 @@ void qmp_block_commit(const char *device,
     drive_get_ref(drive_get_by_blockdev(bs));
 }
 
+#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
+
 void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
                       bool has_granularity, uint32_t granularity,
+                      bool has_buf_size, int64_t buf_size,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
@@ -1222,6 +1225,10 @@ void qmp_drive_mirror(const char *device, const char *target,
     if (!has_granularity) {
         granularity = 0;
     }
+    if (!has_buf_size) {
+        buf_size = DEFAULT_MIRROR_BUF_SIZE;
+    }
+
     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
         error_set(errp, QERR_INVALID_PARAMETER, device);
         return;
@@ -1311,7 +1318,7 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
-    mirror_start(bs, target_bs, speed, granularity, sync,
+    mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
                  on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
diff --git a/hmp.c b/hmp.c
index 0f3347d..99fd892 100644
--- a/hmp.c
+++ b/hmp.c
@@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
 
     qmp_drive_mirror(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, 0,
+                     true, mode, false, 0, false, 0, false, 0,
                      false, 0, false, 0, &errp);
     hmp_handle_error(mon, &errp);
 }
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1165339..f7279b9 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -345,6 +345,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
  * @mode: Whether to collapse all images in the chain to the target.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
@@ -358,8 +359,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs will be switched to read from @target.
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, int64_t granularity, MirrorSyncMode mode,
-                  BlockdevOnError on_source_error,
+                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
diff --git a/qapi-schema.json b/qapi-schema.json
index fd5ec93..ba75c4d 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1641,6 +1641,9 @@
 #               are smaller than that, else the cluster size.  Must be a
 #               power of 2 between 512 and 64M (since 1.4).
 #
+# @buf-size: #optional maximum amount of data in flight from source to
+#            target (since 1.4).
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -1658,7 +1661,7 @@
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
             '*speed': 'int', '*granularity': 'uint32',
-            '*on-source-error': 'BlockdevOnError',
+            '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
 ##
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 835ea26..273b4a6 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -939,7 +939,7 @@ EQMP
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
                       "on-source-error:s?,on-target-error:s?,"
-                      "granularity:i?",
+                      "granularity:i?,buf-size:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
     },
 
@@ -964,6 +964,8 @@ Arguments:
 - "speed": maximum speed of the streaming job, in bytes per second
   (json-int)
 - "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
+- "buf_size": maximum amount of data in flight from source to target, in bytes
+  (json-int, default 10M)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
   allocated in the topmost image, or "none" to only replicate new I/O
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index a1299b3..b040820 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -207,6 +207,37 @@ class TestSingleDrive(ImageMirroringTestCase):
         self.assertTrue(self.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
+    def test_small_buffer(self):
+        self.assert_no_active_mirrors()
+
+        # A small buffer is rounded up automatically
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             buf_size=4096, target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_small_buffer2(self):
+        self.assert_no_active_mirrors()
+
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d'
+                        % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             buf_size=65536, mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
     def test_large_cluster(self):
         self.assert_no_active_mirrors()
 
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 3a89159..84bfd63 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-....................
+......................
 ----------------------------------------------------------------------
-Ran 20 tests
+Ran 22 tests
 
 OK
commit bd48bde8f0fa08dfc8edcafc2bc8aa6d43734463
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Jan 22 09:03:12 2013 +0100

    mirror: switch mirror_iteration to AIO
    
    There is really no change in the behavior of the job here, since
    there is still a maximum of one in-flight I/O operation between
    the source and the target.  However, this patch already introduces
    the AIO callbacks (which are unmodified in the next patch)
    and some of the logic to count in-flight operations and only
    complete the job when there is none.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index 0fecb40..fc6b9b7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -33,8 +33,19 @@ typedef struct MirrorBlockJob {
     unsigned long *cow_bitmap;
     HBitmapIter hbi;
     uint8_t *buf;
+
+    int in_flight;
+    int ret;
 } MirrorBlockJob;
 
+typedef struct MirrorOp {
+    MirrorBlockJob *s;
+    QEMUIOVector qiov;
+    struct iovec iov;
+    int64_t sector_num;
+    int nb_sectors;
+} MirrorOp;
+
 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
                                             int error)
 {
@@ -48,15 +59,70 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
     }
 }
 
-static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
-                                         BlockErrorAction *p_action)
+static void mirror_iteration_done(MirrorOp *op, int ret)
+{
+    MirrorBlockJob *s = op->s;
+    int64_t chunk_num;
+    int nb_chunks, sectors_per_chunk;
+
+    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
+
+    s->in_flight--;
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    chunk_num = op->sector_num / sectors_per_chunk;
+    nb_chunks = op->nb_sectors / sectors_per_chunk;
+    if (s->cow_bitmap && ret >= 0) {
+        bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+    }
+
+    g_slice_free(MirrorOp, op);
+    qemu_coroutine_enter(s->common.co, NULL);
+}
+
+static void mirror_write_complete(void *opaque, int ret)
+{
+    MirrorOp *op = opaque;
+    MirrorBlockJob *s = op->s;
+    if (ret < 0) {
+        BlockDriverState *source = s->common.bs;
+        BlockErrorAction action;
+
+        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        action = mirror_error_action(s, false, -ret);
+        if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+            s->ret = ret;
+        }
+    }
+    mirror_iteration_done(op, ret);
+}
+
+static void mirror_read_complete(void *opaque, int ret)
+{
+    MirrorOp *op = opaque;
+    MirrorBlockJob *s = op->s;
+    if (ret < 0) {
+        BlockDriverState *source = s->common.bs;
+        BlockErrorAction action;
+
+        bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+        action = mirror_error_action(s, true, -ret);
+        if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+            s->ret = ret;
+        }
+
+        mirror_iteration_done(op, ret);
+        return;
+    }
+    bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
+                    mirror_write_complete, op);
+}
+
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
     BlockDriverState *source = s->common.bs;
-    BlockDriverState *target = s->target;
-    QEMUIOVector qiov;
-    int ret, nb_sectors, sectors_per_chunk;
+    int nb_sectors, sectors_per_chunk;
     int64_t end, sector_num, chunk_num;
-    struct iovec iov;
+    MirrorOp *op;
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
     if (s->sector_num < 0) {
@@ -85,35 +151,30 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
 
     end = s->common.len >> BDRV_SECTOR_BITS;
     nb_sectors = MIN(nb_sectors, end - sector_num);
+
+    /* Allocate a MirrorOp that is used as an AIO callback.  */
+    op = g_slice_new(MirrorOp);
+    op->s = s;
+    op->iov.iov_base = s->buf;
+    op->iov.iov_len  = nb_sectors * 512;
+    op->sector_num = sector_num;
+    op->nb_sectors = nb_sectors;
+    qemu_iovec_init_external(&op->qiov, &op->iov, 1);
+
     bdrv_reset_dirty(source, sector_num, nb_sectors);
 
     /* Copy the dirty cluster.  */
-    iov.iov_base = s->buf;
-    iov.iov_len  = nb_sectors * 512;
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
+    s->in_flight++;
     trace_mirror_one_iteration(s, sector_num, nb_sectors);
-    ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
-    if (ret < 0) {
-        *p_action = mirror_error_action(s, true, -ret);
-        goto fail;
-    }
-    ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
-    if (ret < 0) {
-        *p_action = mirror_error_action(s, false, -ret);
-        s->synced = false;
-        goto fail;
-    }
-    if (s->cow_bitmap) {
-        bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
-                   nb_sectors / sectors_per_chunk);
-    }
-    return 0;
+    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+                   mirror_read_complete, op);
+}
 
-fail:
-    /* Try again later.  */
-    bdrv_set_dirty(source, sector_num, nb_sectors);
-    return ret;
+static void mirror_drain(MirrorBlockJob *s)
+{
+    while (s->in_flight > 0) {
+        qemu_coroutine_yield();
+    }
 }
 
 static void coroutine_fn mirror_run(void *opaque)
@@ -121,6 +182,7 @@ static void coroutine_fn mirror_run(void *opaque)
     MirrorBlockJob *s = opaque;
     BlockDriverState *bs = s->common.bs;
     int64_t sector_num, end, sectors_per_chunk, length;
+    uint64_t last_pause_ns;
     BlockDriverInfo bdi;
     char backing_filename[1024];
     int ret = 0;
@@ -179,23 +241,38 @@ static void coroutine_fn mirror_run(void *opaque)
     }
 
     bdrv_dirty_iter_init(bs, &s->hbi);
+    last_pause_ns = qemu_get_clock_ns(rt_clock);
     for (;;) {
         uint64_t delay_ns;
         int64_t cnt;
         bool should_complete;
 
+        if (s->ret < 0) {
+            ret = s->ret;
+            goto immediate_exit;
+        }
+
         cnt = bdrv_get_dirty_count(bs);
-        if (cnt != 0) {
-            BlockErrorAction action = BDRV_ACTION_REPORT;
-            ret = mirror_iteration(s, &action);
-            if (ret < 0 && action == BDRV_ACTION_REPORT) {
-                goto immediate_exit;
+
+        /* Note that even when no rate limit is applied we need to yield
+         * periodically with no pending I/O so that qemu_aio_flush() returns.
+         * We do so every SLICE_TIME nanoseconds, or when there is an error,
+         * or when the source is clean, whichever comes first.
+         */
+        if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
+            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+            if (s->in_flight > 0) {
+                trace_mirror_yield(s, s->in_flight, cnt);
+                qemu_coroutine_yield();
+                continue;
+            } else if (cnt != 0) {
+                mirror_iteration(s);
+                continue;
             }
-            cnt = bdrv_get_dirty_count(bs);
         }
 
         should_complete = false;
-        if (cnt == 0) {
+        if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
             ret = bdrv_flush(s->target);
             if (ret < 0) {
@@ -246,15 +323,12 @@ static void coroutine_fn mirror_run(void *opaque)
                 delay_ns = 0;
             }
 
-            /* Note that even when no rate limit is applied we need to yield
-             * with no pending I/O here so that bdrv_drain_all() returns.
-             */
             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
             if (block_job_is_cancelled(&s->common)) {
                 break;
             }
         } else if (!should_complete) {
-            delay_ns = (cnt == 0 ? SLICE_TIME : 0);
+            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
         } else if (cnt == 0) {
             /* The two disks are in sync.  Exit and report successful
@@ -264,9 +338,20 @@ static void coroutine_fn mirror_run(void *opaque)
             s->common.cancelled = false;
             break;
         }
+        last_pause_ns = qemu_get_clock_ns(rt_clock);
     }
 
 immediate_exit:
+    if (s->in_flight > 0) {
+        /* We get here only if something went wrong.  Either the job failed,
+         * or it was cancelled prematurely so that we do not guarantee that
+         * the target is a copy of the source.
+         */
+        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
+        mirror_drain(s);
+    }
+
+    assert(s->in_flight == 0);
     qemu_vfree(s->buf);
     g_free(s->cow_bitmap);
     bdrv_set_dirty_tracking(bs, 0);
diff --git a/trace-events b/trace-events
index ffa2756..43e6b73 100644
--- a/trace-events
+++ b/trace-events
@@ -85,6 +85,8 @@ mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
 mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
 mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
+mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_yield(void *s, int64_t cnt, int in_flight) "s %p dirty count %"PRId64" in_flight %d"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
commit eee13dfe302833944d1176677d12a6ea421a94ea
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:46 2013 +0100

    mirror: allow customizing the granularity
    
    The desired granularity may be very different depending on the kind of
    operation (e.g. continuous replication vs. collapse-to-raw) and whether
    the VM is expected to perform lots of I/O while mirroring is in progress.
    
    Allow the user to customize it, while providing a sane default so that
    in general there will be no extra allocated space in the target compared
    to the source.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index e425927..0fecb40 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -17,9 +17,6 @@
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
 
-#define BLOCK_SIZE                       (1 << 20)
-#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
-
 #define SLICE_TIME 100000000ULL /* ns */
 
 typedef struct MirrorBlockJob {
@@ -31,6 +28,7 @@ typedef struct MirrorBlockJob {
     bool synced;
     bool should_complete;
     int64_t sector_num;
+    int64_t granularity;
     size_t buf_size;
     unsigned long *cow_bitmap;
     HBitmapIter hbi;
@@ -56,7 +54,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
     BlockDriverState *source = s->common.bs;
     BlockDriverState *target = s->target;
     QEMUIOVector qiov;
-    int ret, nb_sectors;
+    int ret, nb_sectors, sectors_per_chunk;
     int64_t end, sector_num, chunk_num;
     struct iovec iov;
 
@@ -72,16 +70,16 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
      * is very large, we need to do COW ourselves.  The first time a cluster is
      * copied, copy it entirely.
      *
-     * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
-     * powers of two, the number of sectors to copy cannot exceed one cluster.
+     * Because both the granularity and the cluster size are powers of two, the
+     * number of sectors to copy cannot exceed one cluster.
      */
     sector_num = s->sector_num;
-    nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
-    chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+    sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
+    chunk_num = sector_num / sectors_per_chunk;
     if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
         trace_mirror_cow(s, sector_num);
         bdrv_round_to_clusters(s->target,
-                               sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
+                               sector_num, sectors_per_chunk,
                                &sector_num, &nb_sectors);
     }
 
@@ -107,8 +105,8 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
         goto fail;
     }
     if (s->cow_bitmap) {
-        bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
-                   nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
+        bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
+                   nb_sectors / sectors_per_chunk);
     }
     return 0;
 
@@ -122,7 +120,7 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     BlockDriverState *bs = s->common.bs;
-    int64_t sector_num, end, length;
+    int64_t sector_num, end, sectors_per_chunk, length;
     BlockDriverInfo bdi;
     char backing_filename[1024];
     int ret = 0;
@@ -146,22 +144,23 @@ static void coroutine_fn mirror_run(void *opaque)
                               sizeof(backing_filename));
     if (backing_filename[0] && !s->target->backing_hd) {
         bdrv_get_info(s->target, &bdi);
-        if (s->buf_size < bdi.cluster_size) {
+        if (s->granularity < bdi.cluster_size) {
             s->buf_size = bdi.cluster_size;
-            length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
+            length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
             s->cow_bitmap = bitmap_new(length);
         }
     }
 
     end = s->common.len >> BDRV_SECTOR_BITS;
     s->buf = qemu_blockalign(bs, s->buf_size);
+    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
 
     if (s->mode != MIRROR_SYNC_MODE_NONE) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
         BlockDriverState *base;
         base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
         for (sector_num = 0; sector_num < end; ) {
-            int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+            int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
             ret = bdrv_co_is_allocated_above(bs, base,
                                              sector_num, next - sector_num, &n);
 
@@ -242,7 +241,7 @@ static void coroutine_fn mirror_run(void *opaque)
             s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
 
             if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
+                delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
             } else {
                 delay_ns = 0;
             }
@@ -332,7 +331,7 @@ static BlockJobType mirror_job_type = {
 };
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode mode,
+                  int64_t speed, int64_t granularity, MirrorSyncMode mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
@@ -340,6 +339,20 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
 {
     MirrorBlockJob *s;
 
+    if (granularity == 0) {
+        /* Choose the default granularity based on the target file's cluster
+         * size, clamped between 4k and 64k.  */
+        BlockDriverInfo bdi;
+        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
+            granularity = MAX(4096, bdi.cluster_size);
+            granularity = MIN(65536, granularity);
+        } else {
+            granularity = 65536;
+        }
+    }
+
+    assert ((granularity & (granularity - 1)) == 0);
+
     if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
          on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
         !bdrv_iostatus_is_enabled(bs)) {
@@ -356,9 +369,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->on_target_error = on_target_error;
     s->target = target;
     s->mode = mode;
-    s->buf_size = BLOCK_SIZE;
+    s->granularity = granularity;
+    s->buf_size = granularity;
 
-    bdrv_set_dirty_tracking(bs, BLOCK_SIZE);
+    bdrv_set_dirty_tracking(bs, granularity);
     bdrv_set_enable_write_cache(s->target, true);
     bdrv_set_on_error(s->target, on_target_error, on_target_error);
     bdrv_iostatus_enable(s->target);
diff --git a/blockdev.c b/blockdev.c
index 1eb62b6..07fd327 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1193,6 +1193,7 @@ void qmp_drive_mirror(const char *device, const char *target,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
+                      bool has_granularity, uint32_t granularity,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
@@ -1218,6 +1219,17 @@ void qmp_drive_mirror(const char *device, const char *target,
     if (!has_mode) {
         mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
     }
+    if (!has_granularity) {
+        granularity = 0;
+    }
+    if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
+        error_set(errp, QERR_INVALID_PARAMETER, device);
+        return;
+    }
+    if (granularity & (granularity - 1)) {
+        error_set(errp, QERR_INVALID_PARAMETER, device);
+        return;
+    }
 
     bs = bdrv_find(device);
     if (!bs) {
@@ -1299,7 +1311,8 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
-    mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+    mirror_start(bs, target_bs, speed, granularity, sync,
+                 on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_delete(target_bs);
diff --git a/hmp.c b/hmp.c
index c7b6ba0..0f3347d 100644
--- a/hmp.c
+++ b/hmp.c
@@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
 
     qmp_drive_mirror(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0,
+                     true, mode, false, 0, false, 0,
                      false, 0, false, 0, &errp);
     hmp_handle_error(mon, &errp);
 }
diff --git a/include/block/block_int.h b/include/block/block_int.h
index b81c061..1165339 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -344,6 +344,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs: Block device to operate on.
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
  * @mode: Whether to collapse all images in the chain to the target.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
@@ -357,7 +358,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @bs will be switched to read from @target.
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode mode,
+                  int64_t speed, int64_t granularity, MirrorSyncMode mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
diff --git a/qapi-schema.json b/qapi-schema.json
index ce4f901..fd5ec93 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1636,6 +1636,11 @@
 #        (all the disk, only the sectors allocated in the topmost image, or
 #        only new I/O).
 #
+# @granularity: #optional granularity of the dirty bitmap, default is 64K
+#               if the image format doesn't have clusters, 4K if the clusters
+#               are smaller than that, else the cluster size.  Must be a
+#               power of 2 between 512 and 64M (since 1.4).
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -1652,7 +1657,8 @@
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int', '*on-source-error': 'BlockdevOnError',
+            '*speed': 'int', '*granularity': 'uint32',
+            '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
 ##
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cbf1280..835ea26 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -938,7 +938,8 @@ EQMP
     {
         .name       = "drive-mirror",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "on-source-error:s?,on-target-error:s?",
+                      "on-source-error:s?,on-target-error:s?,"
+                      "granularity:i?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
     },
 
@@ -962,6 +963,7 @@ Arguments:
   file/device (NewImageMode, optional, default 'absolute-paths')
 - "speed": maximum speed of the streaming job, in bytes per second
   (json-int)
+- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
   allocated in the topmost image, or "none" to only replicate new I/O
@@ -971,6 +973,10 @@ Arguments:
 - "on-target-error": the action to take on an error on the target
   (BlockdevOnError, default 'report')
 
+The default value of the granularity is the image cluster size clamped
+between 4096 and 65536, if the image format defines one.  If the format
+does not define a cluster size, the default value of the granularity
+is 65536.
 
 
 Example:
commit 50717e941b9f306a45292621999eeafbaa954418
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:45 2013 +0100

    block: allow customizing the granularity of the dirty bitmap
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block-migration.c b/block-migration.c
index 9d0b037..9ac7de6 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -23,7 +23,8 @@
 #include "sysemu/blockdev.h"
 #include <assert.h>
 
-#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
+#define BLOCK_SIZE                       (1 << 20)
+#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
 
 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
 #define BLK_MIG_FLAG_EOS                0x02
@@ -254,7 +255,7 @@ static void set_dirty_tracking(int enable)
     BlkMigDevState *bmds;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bdrv_set_dirty_tracking(bmds->bs, enable);
+        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
     }
 }
 
diff --git a/block.c b/block.c
index a274544..ba67c0d 100644
--- a/block.c
+++ b/block.c
@@ -2833,6 +2833,8 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs)
         info->has_dirty = true;
         info->dirty = g_malloc0(sizeof(*info->dirty));
         info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+        info->dirty->granularity =
+            ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
     }
 
     if (bs->drv) {
@@ -4299,16 +4301,17 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return true;
 }
 
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
 {
     int64_t bitmap_size;
 
-    if (enable) {
-        if (!bs->dirty_bitmap) {
-            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
-            bs->dirty_bitmap = hbitmap_alloc(bitmap_size,
-                                             BDRV_LOG_SECTORS_PER_DIRTY_CHUNK);
-        }
+    assert((granularity & (granularity - 1)) == 0);
+
+    if (granularity) {
+        granularity >>= BDRV_SECTOR_BITS;
+        assert(!bs->dirty_bitmap);
+        bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+        bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
     } else {
         if (bs->dirty_bitmap) {
             hbitmap_free(bs->dirty_bitmap);
diff --git a/block/mirror.c b/block/mirror.c
index 7884b3b..e425927 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -17,14 +17,8 @@
 #include "qemu/ratelimit.h"
 #include "qemu/bitmap.h"
 
-enum {
-    /*
-     * Size of data buffer for populating the image file.  This should be large
-     * enough to process multiple clusters in a single call, so that populating
-     * contiguous regions of the image is efficient.
-     */
-    BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
-};
+#define BLOCK_SIZE                       (1 << 20)
+#define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
 
 #define SLICE_TIME 100000000ULL /* ns */
 
@@ -276,7 +270,7 @@ static void coroutine_fn mirror_run(void *opaque)
 immediate_exit:
     qemu_vfree(s->buf);
     g_free(s->cow_bitmap);
-    bdrv_set_dirty_tracking(bs, false);
+    bdrv_set_dirty_tracking(bs, 0);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
@@ -364,7 +358,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->mode = mode;
     s->buf_size = BLOCK_SIZE;
 
-    bdrv_set_dirty_tracking(bs, true);
+    bdrv_set_dirty_tracking(bs, BLOCK_SIZE);
     bdrv_set_enable_write_cache(s->target, true);
     bdrv_set_on_error(s->target, on_target_error, on_target_error);
     bdrv_iostatus_enable(s->target);
diff --git a/include/block/block.h b/include/block/block.h
index 9ee9068..5c3b911 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -355,11 +355,8 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
-#define BDRV_SECTORS_PER_DIRTY_CHUNK     (1 << BDRV_LOG_SECTORS_PER_DIRTY_CHUNK)
-#define BDRV_LOG_SECTORS_PER_DIRTY_CHUNK 11
-
 struct HBitmapIter;
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..ce4f901 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -667,10 +667,12 @@
 #
 # @count: number of dirty bytes according to the dirty bitmap
 #
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
 # Since: 1.3
 ##
 { 'type': 'BlockDirtyInfo',
-  'data': {'count': 'int'} }
+  'data': {'count': 'int', 'granularity': 'int'} }
 
 ##
 # @BlockInfo:
commit acc906c6c5d5745fe7a3a2ed1bb5f0b1d6d1f21b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:44 2013 +0100

    block: return count of dirty sectors, not chunks
    
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block-migration.c b/block-migration.c
index 6acf3e1..9d0b037 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -478,7 +478,7 @@ static int64_t get_remaining_dirty(void)
         dirty += bdrv_get_dirty_count(bmds->bs);
     }
 
-    return dirty * BLOCK_SIZE;
+    return dirty << BDRV_SECTOR_BITS;
 }
 
 static void blk_mig_cleanup(void)
diff --git a/block.c b/block.c
index 4a4ab16..a274544 100644
--- a/block.c
+++ b/block.c
@@ -2832,8 +2832,7 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs)
     if (bs->dirty_bitmap) {
         info->has_dirty = true;
         info->dirty = g_malloc0(sizeof(*info->dirty));
-        info->dirty->count = bdrv_get_dirty_count(bs) *
-            BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
+        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
     }
 
     if (bs->drv) {
@@ -4347,7 +4346,7 @@ void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
 {
     if (bs->dirty_bitmap) {
-        return hbitmap_count(bs->dirty_bitmap) >> BDRV_LOG_SECTORS_PER_DIRTY_CHUNK;
+        return hbitmap_count(bs->dirty_bitmap);
     } else {
         return 0;
     }
diff --git a/block/mirror.c b/block/mirror.c
index 307bcf1..7884b3b 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -245,7 +245,7 @@ static void coroutine_fn mirror_run(void *opaque)
         trace_mirror_before_sleep(s, cnt, s->synced);
         if (!s->synced) {
             /* Publish progress */
-            s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
+            s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
 
             if (s->common.speed) {
                 delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
commit b812f6719c21921a819709098dc018ed151c999b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:43 2013 +0100

    mirror: perform COW if the cluster size is bigger than the granularity
    
    When mirroring runs, the backing files for the target may not yet be
    ready.  However, this means that a copy-on-write operation on the target
    would fill the missing sectors with zeros.  Copy-on-write only happens
    if the granularity of the dirty bitmap is smaller than the cluster size
    (and only for clusters that are allocated in the source after the job
    has started copying).  So far, the granularity was fixed to 1MB; to avoid
    the problem we detected the situation and required the backing files to
    be available in that case only.
    
    However, we want to lower the granularity for efficiency, so we need
    a better solution.  The solution is to always copy a whole cluster the
    first time it is touched.  The code keeps a bitmap of clusters that
    have already been allocated by the mirroring job, and only does "manual"
    copy-on-write if the chunk being copied is zero in the bitmap.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index 20cb1e7..307bcf1 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,6 +15,7 @@
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "qemu/ratelimit.h"
+#include "qemu/bitmap.h"
 
 enum {
     /*
@@ -36,6 +37,8 @@ typedef struct MirrorBlockJob {
     bool synced;
     bool should_complete;
     int64_t sector_num;
+    size_t buf_size;
+    unsigned long *cow_bitmap;
     HBitmapIter hbi;
     uint8_t *buf;
 } MirrorBlockJob;
@@ -60,7 +63,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
     BlockDriverState *target = s->target;
     QEMUIOVector qiov;
     int ret, nb_sectors;
-    int64_t end;
+    int64_t end, sector_num, chunk_num;
     struct iovec iov;
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
@@ -71,32 +74,53 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
         assert(s->sector_num >= 0);
     }
 
+    /* If we have no backing file yet in the destination, and the cluster size
+     * is very large, we need to do COW ourselves.  The first time a cluster is
+     * copied, copy it entirely.
+     *
+     * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
+     * powers of two, the number of sectors to copy cannot exceed one cluster.
+     */
+    sector_num = s->sector_num;
+    nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
+    chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
+    if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
+        trace_mirror_cow(s, sector_num);
+        bdrv_round_to_clusters(s->target,
+                               sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
+                               &sector_num, &nb_sectors);
+    }
+
     end = s->common.len >> BDRV_SECTOR_BITS;
-    nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
-    bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+    nb_sectors = MIN(nb_sectors, end - sector_num);
+    bdrv_reset_dirty(source, sector_num, nb_sectors);
 
     /* Copy the dirty cluster.  */
     iov.iov_base = s->buf;
     iov.iov_len  = nb_sectors * 512;
     qemu_iovec_init_external(&qiov, &iov, 1);
 
-    trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
-    ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+    trace_mirror_one_iteration(s, sector_num, nb_sectors);
+    ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
     if (ret < 0) {
         *p_action = mirror_error_action(s, true, -ret);
         goto fail;
     }
-    ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+    ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
     if (ret < 0) {
         *p_action = mirror_error_action(s, false, -ret);
         s->synced = false;
         goto fail;
     }
+    if (s->cow_bitmap) {
+        bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
+                   nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
+    }
     return 0;
 
 fail:
     /* Try again later.  */
-    bdrv_set_dirty(source, s->sector_num, nb_sectors);
+    bdrv_set_dirty(source, sector_num, nb_sectors);
     return ret;
 }
 
@@ -104,7 +128,9 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     BlockDriverState *bs = s->common.bs;
-    int64_t sector_num, end;
+    int64_t sector_num, end, length;
+    BlockDriverInfo bdi;
+    char backing_filename[1024];
     int ret = 0;
     int n;
 
@@ -118,8 +144,23 @@ static void coroutine_fn mirror_run(void *opaque)
         return;
     }
 
+    /* If we have no backing file yet in the destination, we cannot let
+     * the destination do COW.  Instead, we copy sectors around the
+     * dirty data if needed.  We need a bitmap to do that.
+     */
+    bdrv_get_backing_filename(s->target, backing_filename,
+                              sizeof(backing_filename));
+    if (backing_filename[0] && !s->target->backing_hd) {
+        bdrv_get_info(s->target, &bdi);
+        if (s->buf_size < bdi.cluster_size) {
+            s->buf_size = bdi.cluster_size;
+            length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
+            s->cow_bitmap = bitmap_new(length);
+        }
+    }
+
     end = s->common.len >> BDRV_SECTOR_BITS;
-    s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+    s->buf = qemu_blockalign(bs, s->buf_size);
 
     if (s->mode != MIRROR_SYNC_MODE_NONE) {
         /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -234,6 +275,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
 immediate_exit:
     qemu_vfree(s->buf);
+    g_free(s->cow_bitmap);
     bdrv_set_dirty_tracking(bs, false);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
@@ -320,6 +362,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->on_target_error = on_target_error;
     s->target = target;
     s->mode = mode;
+    s->buf_size = BLOCK_SIZE;
+
     bdrv_set_dirty_tracking(bs, true);
     bdrv_set_enable_write_cache(s->target, true);
     bdrv_set_on_error(s->target, on_target_error, on_target_error);
diff --git a/blockdev.c b/blockdev.c
index 9126587..1eb62b6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1197,7 +1197,6 @@ void qmp_drive_mirror(const char *device, const char *target,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
 {
-    BlockDriverInfo bdi;
     BlockDriverState *bs;
     BlockDriverState *source, *target_bs;
     BlockDriver *proto_drv;
@@ -1288,6 +1287,9 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
+    /* Mirroring takes care of copy-on-write using the source's backing
+     * file.
+     */
     target_bs = bdrv_new("");
     ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
 
@@ -1297,17 +1299,6 @@ void qmp_drive_mirror(const char *device, const char *target,
         return;
     }
 
-    /* We need a backing file if we will copy parts of a cluster.  */
-    if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
-        bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
-        ret = bdrv_open_backing_file(target_bs);
-        if (ret < 0) {
-            bdrv_delete(target_bs);
-            error_set(errp, QERR_OPEN_FILE_FAILED, target);
-            return;
-        }
-    }
-
     mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index c6eb851..a1299b3 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -292,6 +292,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
         self.assertTrue(self.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
+    def test_large_cluster(self):
+        self.assert_no_active_mirrors()
+
+        # qemu-img create fails if the image is not there
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
+                        %(TestMirrorNoBacking.image_len), target_backing_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
+                        % (TestMirrorNoBacking.image_len, target_backing_img), target_img)
+        os.remove(target_backing_img)
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
 class TestReadErrors(ImageMirroringTestCase):
     image_len = 2 * 1024 * 1024 # MB
 
@@ -330,6 +351,9 @@ new_state = "1"
                  '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
                        % (self.blkdebug_file, backing_img),
                  test_img)
+        # Write something for tests that use sync='top'
+        qemu_io('-c', 'write %d 512' % (self.MIRROR_GRANULARITY + 65536),
+                        test_img)
         self.vm = iotests.VM().add_drive(test_img)
         self.vm.launch()
 
@@ -383,6 +407,32 @@ new_state = "1"
         self.complete_and_wait()
         self.vm.shutdown()
 
+    def test_large_cluster(self):
+        self.assert_no_active_mirrors()
+
+        # Test COW into the target image.  The first half of the
+        # cluster at MIRROR_GRANULARITY has to be copied from
+        # backing_img, even though sync='top'.
+        qemu_img('create', '-f', iotests.imgfmt, '-ocluster_size=131072,backing_file=%s' %(backing_img), target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='top',
+                             on_source_error='ignore',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        event = self.vm.get_qmp_event(wait=True)
+        self.assertEquals(event['event'], 'BLOCK_JOB_ERROR')
+        self.assert_qmp(event, 'data/device', 'drive0')
+        self.assert_qmp(event, 'data/operation', 'read')
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/paused', False)
+        self.complete_and_wait()
+        self.vm.shutdown()
+
+        # Detach blkdebug to compare images successfully
+        qemu_img('rebase', '-f', iotests.imgfmt, '-u', '-b', backing_img, test_img)
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
     def test_stop_read(self):
         self.assert_no_active_mirrors()
 
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 71009c2..3a89159 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-..................
+....................
 ----------------------------------------------------------------------
-Ran 18 tests
+Ran 20 tests
 
 OK
diff --git a/trace-events b/trace-events
index 61ed349..ffa2756 100644
--- a/trace-events
+++ b/trace-events
@@ -84,6 +84,7 @@ mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
 mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
commit 343bded4ecfc467012e2ab675da75749f1d90f70
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:42 2013 +0100

    block: make round_to_clusters public
    
    This is needed in the following patch.
    
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index e0ff736..4a4ab16 100644
--- a/block.c
+++ b/block.c
@@ -1673,10 +1673,10 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
 /**
  * Round a region to cluster boundaries
  */
-static void round_to_clusters(BlockDriverState *bs,
-                              int64_t sector_num, int nb_sectors,
-                              int64_t *cluster_sector_num,
-                              int *cluster_nb_sectors)
+void bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t sector_num, int nb_sectors,
+                            int64_t *cluster_sector_num,
+                            int *cluster_nb_sectors)
 {
     BlockDriverInfo bdi;
 
@@ -1718,8 +1718,8 @@ static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
      * CoR read and write operations are atomic and guest writes cannot
      * interleave between them.
      */
-    round_to_clusters(bs, sector_num, nb_sectors,
-                      &cluster_sector_num, &cluster_nb_sectors);
+    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+                           &cluster_sector_num, &cluster_nb_sectors);
 
     do {
         retry = false;
@@ -2185,8 +2185,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.
      */
-    round_to_clusters(bs, sector_num, nb_sectors,
-                      &cluster_sector_num, &cluster_nb_sectors);
+    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+                           &cluster_sector_num, &cluster_nb_sectors);
 
     trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
                                    cluster_sector_num, cluster_nb_sectors);
diff --git a/include/block/block.h b/include/block/block.h
index 678fc60..9ee9068 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -309,6 +309,10 @@ int bdrv_get_flags(BlockDriverState *bs);
 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf, int nb_sectors);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t sector_num, int nb_sectors,
+                            int64_t *cluster_sector_num,
+                            int *cluster_nb_sectors);
 
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
 void bdrv_get_backing_filename(BlockDriverState *bs,
commit 8f0720ecbc3677e13fc7531588fc3831cc972ee4
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:41 2013 +0100

    block: implement dirty bitmap using HBitmap
    
    This actually uses the dirty bitmap in the block layer, and converts
    mirroring to use an HBitmapIter.
    
    Reviewed-by: Laszlo Ersek <lersek at redhat.com> (except block/mirror.c parts)
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 6fa7c90..e0ff736 100644
--- a/block.c
+++ b/block.c
@@ -1286,7 +1286,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     bs_dest->iostatus           = bs_src->iostatus;
 
     /* dirty bitmap */
-    bs_dest->dirty_count        = bs_src->dirty_count;
     bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
 
     /* job */
@@ -2035,36 +2034,6 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
-#define BITS_PER_LONG  (sizeof(unsigned long) * 8)
-
-static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int dirty)
-{
-    int64_t start, end;
-    unsigned long val, idx, bit;
-
-    start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
-    end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
-
-    for (; start <= end; start++) {
-        idx = start / BITS_PER_LONG;
-        bit = start % BITS_PER_LONG;
-        val = bs->dirty_bitmap[idx];
-        if (dirty) {
-            if (!(val & (1UL << bit))) {
-                bs->dirty_count++;
-                val |= 1UL << bit;
-            }
-        } else {
-            if (val & (1UL << bit)) {
-                bs->dirty_count--;
-                val &= ~(1UL << bit);
-            }
-        }
-        bs->dirty_bitmap[idx] = val;
-    }
-}
-
 /* Return < 0 if error. Important errors are:
   -EIO         generic I/O error (may happen for all errors)
   -ENOMEDIUM   No media inserted.
@@ -4173,7 +4142,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
     }
 
     if (bs->dirty_bitmap) {
-        set_dirty_bitmap(bs, sector_num, nb_sectors, 0);
+        bdrv_reset_dirty(bs, sector_num, nb_sectors);
     }
 
     if (bs->drv->bdrv_co_discard) {
@@ -4335,18 +4304,15 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
 {
     int64_t bitmap_size;
 
-    bs->dirty_count = 0;
     if (enable) {
         if (!bs->dirty_bitmap) {
-            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
-                    BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
-            bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
-
-            bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
+            bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+            bs->dirty_bitmap = hbitmap_alloc(bitmap_size,
+                                             BDRV_LOG_SECTORS_PER_DIRTY_CHUNK);
         }
     } else {
         if (bs->dirty_bitmap) {
-            g_free(bs->dirty_bitmap);
+            hbitmap_free(bs->dirty_bitmap);
             bs->dirty_bitmap = NULL;
         }
     }
@@ -4354,67 +4320,37 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
 
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
 {
-    int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
-    if (bs->dirty_bitmap &&
-        (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
-        return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
-            (1UL << (chunk % BITS_PER_LONG)));
+    if (bs->dirty_bitmap) {
+        return hbitmap_get(bs->dirty_bitmap, sector);
     } else {
         return 0;
     }
 }
 
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
 {
-    int64_t chunk;
-    int bit, elem;
-
-    /* Avoid an infinite loop.  */
-    assert(bs->dirty_count > 0);
-
-    sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
-    chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
-    QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
-    elem = chunk / BITS_PER_LONG;
-    bit = chunk % BITS_PER_LONG;
-    for (;;) {
-        if (sector >= bs->total_sectors) {
-            sector = 0;
-            bit = elem = 0;
-        }
-        if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
-            sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
-            elem++;
-        } else {
-            if (bs->dirty_bitmap[elem] & (1UL << bit)) {
-                return sector;
-            }
-            sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
-            if (++bit == BITS_PER_LONG) {
-                bit = 0;
-                elem++;
-            }
-        }
-    }
+    hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
 }
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                     int nr_sectors)
 {
-    set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
+    hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
 }
 
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
                       int nr_sectors)
 {
-    set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
+    hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
 }
 
 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
 {
-    return bs->dirty_count;
+    if (bs->dirty_bitmap) {
+        return hbitmap_count(bs->dirty_bitmap) >> BDRV_LOG_SECTORS_PER_DIRTY_CHUNK;
+    } else {
+        return 0;
+    }
 }
 
 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
diff --git a/block/mirror.c b/block/mirror.c
index 6180aa3..20cb1e7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -36,6 +36,7 @@ typedef struct MirrorBlockJob {
     bool synced;
     bool should_complete;
     int64_t sector_num;
+    HBitmapIter hbi;
     uint8_t *buf;
 } MirrorBlockJob;
 
@@ -62,8 +63,15 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
     int64_t end;
     struct iovec iov;
 
+    s->sector_num = hbitmap_iter_next(&s->hbi);
+    if (s->sector_num < 0) {
+        bdrv_dirty_iter_init(source, &s->hbi);
+        s->sector_num = hbitmap_iter_next(&s->hbi);
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
+        assert(s->sector_num >= 0);
+    }
+
     end = s->common.len >> BDRV_SECTOR_BITS;
-    s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
     nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
     bdrv_reset_dirty(source, s->sector_num, nb_sectors);
 
@@ -136,7 +144,7 @@ static void coroutine_fn mirror_run(void *opaque)
         }
     }
 
-    s->sector_num = -1;
+    bdrv_dirty_iter_init(bs, &s->hbi);
     for (;;) {
         uint64_t delay_ns;
         int64_t cnt;
diff --git a/include/block/block.h b/include/block/block.h
index ffd1936..678fc60 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -351,13 +351,15 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
-#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048
+#define BDRV_SECTORS_PER_DIRTY_CHUNK     (1 << BDRV_LOG_SECTORS_PER_DIRTY_CHUNK)
+#define BDRV_LOG_SECTORS_PER_DIRTY_CHUNK 11
 
+struct HBitmapIter;
 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f83ffb8..b81c061 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -32,6 +32,7 @@
 #include "qapi-types.h"
 #include "qapi/qmp/qerror.h"
 #include "monitor/monitor.h"
+#include "qemu/hbitmap.h"
 
 #define BLOCK_FLAG_ENCRYPT          1
 #define BLOCK_FLAG_COMPAT6          4
@@ -275,8 +276,7 @@ struct BlockDriverState {
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
     char device_name[32];
-    unsigned long *dirty_bitmap;
-    int64_t dirty_count;
+    HBitmap *dirty_bitmap;
     int in_use; /* users other than guest access, eg. block migration */
     QTAILQ_ENTRY(BlockDriverState) list;
 
diff --git a/trace-events b/trace-events
index 732cb12..61ed349 100644
--- a/trace-events
+++ b/trace-events
@@ -79,6 +79,7 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 
 # block/mirror.c
 mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
commit e7c033c3fa22a1e42d9ba57fed6ddecfbce3a01c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:40 2013 +0100

    add hierarchical bitmap data type and test cases
    
    HBitmaps provides an array of bits.  The bits are stored as usual in an
    array of unsigned longs, but HBitmap is also optimized to provide fast
    iteration over set bits; going from one bit to the next is O(logB n)
    worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
    that the number of levels is in fact fixed.
    
    In order to do this, it stacks multiple bitmaps with progressively coarser
    granularity; in all levels except the last, bit N is set iff the N-th
    unsigned long is nonzero in the immediately next level.  When iteration
    completes on the last level it can examine the 2nd-last level to quickly
    skip entire words, and even do so recursively to skip blocks of 64 words or
    powers thereof (32 on 32-bit machines).
    
    Given an index in the bitmap, it can be split in group of bits like
    this (for the 64-bit case):
    
         bits 0-57 => word in the last bitmap     | bits 58-63 => bit in the word
         bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
         bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
    
    So it is easy to move up simply by shifting the index right by
    log2(BITS_PER_LONG) bits.  To move down, you shift the index left
    similarly, and add the word index within the group.  Iteration uses
    ffs (find first set bit) to find the next word to examine; this
    operation can be done in constant time in most current architectures.
    
    Setting or clearing a range of m bits on all levels, the work to perform
    is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
    
    When iterating on a bitmap, each bit (on any level) is only visited
    once.  Hence, The total cost of visiting a bitmap with m bits in it is
    the number of bits that are set in all bitmaps.  Unless the bitmap is
    extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
    cost of advancing from one bit to the next is usually constant.
    
    Reviewed-by: Laszlo Ersek <lersek at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
new file mode 100644
index 0000000..7ddfb66
--- /dev/null
+++ b/include/qemu/hbitmap.h
@@ -0,0 +1,207 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef HBITMAP_H
+#define HBITMAP_H 1
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "bitops.h"
+
+typedef struct HBitmap HBitmap;
+typedef struct HBitmapIter HBitmapIter;
+
+#define BITS_PER_LEVEL         (BITS_PER_LONG == 32 ? 5 : 6)
+
+/* For 32-bit, the largest that fits in a 4 GiB address space.
+ * For 64-bit, the number of sectors in 1 PiB.  Good luck, in
+ * either case... :)
+ */
+#define HBITMAP_LOG_MAX_SIZE   (BITS_PER_LONG == 32 ? 34 : 41)
+
+/* We need to place a sentinel in level 0 to speed up iteration.  Thus,
+ * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL.  The
+ * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE
+ * is an exact multiple of BITS_PER_LEVEL.
+ */
+#define HBITMAP_LEVELS         ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1)
+
+struct HBitmapIter {
+    const HBitmap *hb;
+
+    /* Copied from hb for access in the inline functions (hb is opaque).  */
+    int granularity;
+
+    /* Entry offset into the last-level array of longs.  */
+    size_t pos;
+
+    /* The currently-active path in the tree.  Each item of cur[i] stores
+     * the bits (i.e. the subtrees) yet to be processed under that node.
+     */
+    unsigned long cur[HBITMAP_LEVELS];
+};
+
+/**
+ * hbitmap_alloc:
+ * @size: Number of bits in the bitmap.
+ * @granularity: Granularity of the bitmap.  Aligned groups of 2^@granularity
+ * bits will be represented by a single bit.  Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire set; iteration will only visit the first
+ * bit of each group.
+ *
+ * Allocate a new HBitmap.
+ */
+HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+
+/**
+ * hbitmap_empty:
+ * @hb: HBitmap to operate on.
+ *
+ * Return whether the bitmap is empty.
+ */
+bool hbitmap_empty(const HBitmap *hb);
+
+/**
+ * hbitmap_granularity:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the granularity of the HBitmap.
+ */
+int hbitmap_granularity(const HBitmap *hb);
+
+/**
+ * hbitmap_count:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the number of bits set in the HBitmap.
+ */
+uint64_t hbitmap_count(const HBitmap *hb);
+
+/**
+ * hbitmap_set:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to set (0-based).
+ * @count: Number of bits to set.
+ *
+ * Set a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_reset:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to reset (0-based).
+ * @count: Number of bits to reset.
+ *
+ * Reset a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_get:
+ * @hb: HBitmap to operate on.
+ * @item: Bit to query (0-based).
+ *
+ * Return whether the @item-th bit in an HBitmap is set.
+ */
+bool hbitmap_get(const HBitmap *hb, uint64_t item);
+
+/**
+ * hbitmap_free:
+ * @hb: HBitmap to operate on.
+ *
+ * Free an HBitmap and all of its associated memory.
+ */
+void hbitmap_free(HBitmap *hb);
+
+/**
+ * hbitmap_iter_init:
+ * @hbi: HBitmapIter to initialize.
+ * @hb: HBitmap to iterate on.
+ * @first: First bit to visit (0-based).
+ *
+ * Set up @hbi to iterate on the HBitmap @hb.  hbitmap_iter_next will return
+ * the lowest-numbered bit that is set in @hb, starting at @first.
+ *
+ * Concurrent setting of bits is acceptable, and will at worst cause the
+ * iteration to miss some of those bits.  Resetting bits before the current
+ * position of the iterator is also okay.  However, concurrent resetting of
+ * bits can lead to unexpected behavior if the iterator has not yet reached
+ * those bits.
+ */
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
+
+/* hbitmap_iter_skip_words:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Internal function used by hbitmap_iter_next and hbitmap_iter_next_word.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
+
+/**
+ * hbitmap_iter_next:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Return the next bit that is set in @hbi's associated HBitmap,
+ * or -1 if all remaining bits are zero.
+ */
+static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+    unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+    int64_t item;
+
+    if (cur == 0) {
+        cur = hbitmap_iter_skip_words(hbi);
+        if (cur == 0) {
+            return -1;
+        }
+    }
+
+    /* The next call will resume work from the next bit.  */
+    hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+    item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
+
+    return item << hbi->granularity;
+}
+
+/**
+ * hbitmap_iter_next_word:
+ * @hbi: HBitmapIter to operate on.
+ * @p_cur: Location where to store the next non-zero word.
+ *
+ * Return the index of the next nonzero word that is set in @hbi's
+ * associated HBitmap, and set *p_cur to the content of that word
+ * (bits before the index that was passed to hbitmap_iter_init are
+ * trimmed on the first call).  Return -1, and set *p_cur to zero,
+ * if all remaining words are zero.
+ */
+static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
+{
+    unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+
+    if (cur == 0) {
+        cur = hbitmap_iter_skip_words(hbi);
+        if (cur == 0) {
+            *p_cur = 0;
+            return -1;
+        }
+    }
+
+    /* The next call will resume work from the next word.  */
+    hbi->cur[HBITMAP_LEVELS - 1] = 0;
+    *p_cur = cur;
+    return hbi->pos;
+}
+
+
+#endif
diff --git a/tests/Makefile b/tests/Makefile
index d86e95a..b3a6d86 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -45,6 +45,8 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
 check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
+gcov-files-test-hbitmap-y = util/hbitmap.c
+check-unit-y += tests/test-hbitmap$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -86,6 +88,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
 tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
 tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
+tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
 
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
new file mode 100644
index 0000000..fcc6a00
--- /dev/null
+++ b/tests/test-hbitmap.c
@@ -0,0 +1,408 @@
+/*
+ * Hierarchical bitmap unit-tests.
+ *
+ * Copyright (C) 2012 Red Hat Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <stdarg.h>
+#include "qemu/hbitmap.h"
+
+#define LOG_BITS_PER_LONG          (BITS_PER_LONG == 32 ? 5 : 6)
+
+#define L1                         BITS_PER_LONG
+#define L2                         (BITS_PER_LONG * L1)
+#define L3                         (BITS_PER_LONG * L2)
+
+typedef struct TestHBitmapData {
+    HBitmap       *hb;
+    unsigned long *bits;
+    size_t         size;
+    int            granularity;
+} TestHBitmapData;
+
+
+/* Check that the HBitmap and the shadow bitmap contain the same data,
+ * ignoring the same "first" bits.
+ */
+static void hbitmap_test_check(TestHBitmapData *data,
+                               uint64_t first)
+{
+    uint64_t count = 0;
+    size_t pos;
+    int bit;
+    HBitmapIter hbi;
+    int64_t i, next;
+
+    hbitmap_iter_init(&hbi, data->hb, first);
+
+    i = first;
+    for (;;) {
+        next = hbitmap_iter_next(&hbi);
+        if (next < 0) {
+            next = data->size;
+        }
+
+        while (i < next) {
+            pos = i >> LOG_BITS_PER_LONG;
+            bit = i & (BITS_PER_LONG - 1);
+            i++;
+            g_assert_cmpint(data->bits[pos] & (1UL << bit), ==, 0);
+        }
+
+        if (next == data->size) {
+            break;
+        }
+
+        pos = i >> LOG_BITS_PER_LONG;
+        bit = i & (BITS_PER_LONG - 1);
+        i++;
+        count++;
+        g_assert_cmpint(data->bits[pos] & (1UL << bit), !=, 0);
+    }
+
+    if (first == 0) {
+        g_assert_cmpint(count << data->granularity, ==, hbitmap_count(data->hb));
+    }
+}
+
+/* This is provided instead of a test setup function so that the sizes
+   are kept in the test functions (and not in main()) */
+static void hbitmap_test_init(TestHBitmapData *data,
+                              uint64_t size, int granularity)
+{
+    size_t n;
+    data->hb = hbitmap_alloc(size, granularity);
+
+    n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG;
+    if (n == 0) {
+        n = 1;
+    }
+    data->bits = g_new0(unsigned long, n);
+    data->size = size;
+    data->granularity = granularity;
+    hbitmap_test_check(data, 0);
+}
+
+static void hbitmap_test_teardown(TestHBitmapData *data,
+                                  const void *unused)
+{
+    if (data->hb) {
+        hbitmap_free(data->hb);
+        data->hb = NULL;
+    }
+    if (data->bits) {
+        g_free(data->bits);
+        data->bits = NULL;
+    }
+}
+
+/* Set a range in the HBitmap and in the shadow "simple" bitmap.
+ * The two bitmaps are then tested against each other.
+ */
+static void hbitmap_test_set(TestHBitmapData *data,
+                             uint64_t first, uint64_t count)
+{
+    hbitmap_set(data->hb, first, count);
+    while (count-- != 0) {
+        size_t pos = first >> LOG_BITS_PER_LONG;
+        int bit = first & (BITS_PER_LONG - 1);
+        first++;
+
+        data->bits[pos] |= 1UL << bit;
+    }
+
+    if (data->granularity == 0) {
+        hbitmap_test_check(data, 0);
+    }
+}
+
+/* Reset a range in the HBitmap and in the shadow "simple" bitmap.
+ */
+static void hbitmap_test_reset(TestHBitmapData *data,
+                               uint64_t first, uint64_t count)
+{
+    hbitmap_reset(data->hb, first, count);
+    while (count-- != 0) {
+        size_t pos = first >> LOG_BITS_PER_LONG;
+        int bit = first & (BITS_PER_LONG - 1);
+        first++;
+
+        data->bits[pos] &= ~(1UL << bit);
+    }
+
+    if (data->granularity == 0) {
+        hbitmap_test_check(data, 0);
+    }
+}
+
+static void hbitmap_test_check_get(TestHBitmapData *data)
+{
+    uint64_t count = 0;
+    uint64_t i;
+
+    for (i = 0; i < data->size; i++) {
+        size_t pos = i >> LOG_BITS_PER_LONG;
+        int bit = i & (BITS_PER_LONG - 1);
+        unsigned long val = data->bits[pos] & (1UL << bit);
+        count += hbitmap_get(data->hb, i);
+        g_assert_cmpint(hbitmap_get(data->hb, i), ==, val != 0);
+    }
+    g_assert_cmpint(count, ==, hbitmap_count(data->hb));
+}
+
+static void test_hbitmap_zero(TestHBitmapData *data,
+                               const void *unused)
+{
+    hbitmap_test_init(data, 0, 0);
+}
+
+static void test_hbitmap_unaligned(TestHBitmapData *data,
+                                   const void *unused)
+{
+    hbitmap_test_init(data, L3 + 23, 0);
+    hbitmap_test_set(data, 0, 1);
+    hbitmap_test_set(data, L3 + 22, 1);
+}
+
+static void test_hbitmap_iter_empty(TestHBitmapData *data,
+                                    const void *unused)
+{
+    hbitmap_test_init(data, L1, 0);
+}
+
+static void test_hbitmap_iter_partial(TestHBitmapData *data,
+                                      const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+    hbitmap_test_check(data, 1);
+    hbitmap_test_check(data, L1 - 1);
+    hbitmap_test_check(data, L1);
+    hbitmap_test_check(data, L1 * 2 - 1);
+    hbitmap_test_check(data, L2 - 1);
+    hbitmap_test_check(data, L2);
+    hbitmap_test_check(data, L2 + 1);
+    hbitmap_test_check(data, L2 + L1);
+    hbitmap_test_check(data, L2 + L1 * 2 - 1);
+    hbitmap_test_check(data, L2 * 2 - 1);
+    hbitmap_test_check(data, L2 * 2);
+    hbitmap_test_check(data, L2 * 2 + 1);
+    hbitmap_test_check(data, L2 * 2 + L1);
+    hbitmap_test_check(data, L2 * 2 + L1 * 2 - 1);
+    hbitmap_test_check(data, L3 / 2);
+}
+
+static void test_hbitmap_iter_past(TestHBitmapData *data,
+                                    const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+    hbitmap_test_check(data, L3);
+}
+
+static void test_hbitmap_set_all(TestHBitmapData *data,
+                                 const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+}
+
+static void test_hbitmap_get_all(TestHBitmapData *data,
+                                 const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_set(data, 0, L3);
+    hbitmap_test_check_get(data);
+}
+
+static void test_hbitmap_get_some(TestHBitmapData *data,
+                                  const void *unused)
+{
+    hbitmap_test_init(data, 2 * L2, 0);
+    hbitmap_test_set(data, 10, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L1 - 1, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L1, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L2 - 1, 1);
+    hbitmap_test_check_get(data);
+    hbitmap_test_set(data, L2, 1);
+    hbitmap_test_check_get(data);
+}
+
+static void test_hbitmap_set_one(TestHBitmapData *data,
+                                 const void *unused)
+{
+    hbitmap_test_init(data, 2 * L2, 0);
+    hbitmap_test_set(data, 10, 1);
+    hbitmap_test_set(data, L1 - 1, 1);
+    hbitmap_test_set(data, L1, 1);
+    hbitmap_test_set(data, L2 - 1, 1);
+    hbitmap_test_set(data, L2, 1);
+}
+
+static void test_hbitmap_set_two_elem(TestHBitmapData *data,
+                                      const void *unused)
+{
+    hbitmap_test_init(data, 2 * L2, 0);
+    hbitmap_test_set(data, L1 - 1, 2);
+    hbitmap_test_set(data, L1 * 2 - 1, 4);
+    hbitmap_test_set(data, L1 * 4, L1 + 1);
+    hbitmap_test_set(data, L1 * 8 - 1, L1 + 1);
+    hbitmap_test_set(data, L2 - 1, 2);
+    hbitmap_test_set(data, L2 + L1 - 1, 8);
+    hbitmap_test_set(data, L2 + L1 * 4, L1 + 1);
+    hbitmap_test_set(data, L2 + L1 * 8 - 1, L1 + 1);
+}
+
+static void test_hbitmap_set(TestHBitmapData *data,
+                             const void *unused)
+{
+    hbitmap_test_init(data, L3 * 2, 0);
+    hbitmap_test_set(data, L1 - 1, L1 + 2);
+    hbitmap_test_set(data, L1 * 3 - 1, L1 + 2);
+    hbitmap_test_set(data, L1 * 5, L1 * 2 + 1);
+    hbitmap_test_set(data, L1 * 8 - 1, L1 * 2 + 1);
+    hbitmap_test_set(data, L2 - 1, L1 + 2);
+    hbitmap_test_set(data, L2 + L1 * 2 - 1, L1 + 2);
+    hbitmap_test_set(data, L2 + L1 * 4, L1 * 2 + 1);
+    hbitmap_test_set(data, L2 + L1 * 7 - 1, L1 * 2 + 1);
+    hbitmap_test_set(data, L2 * 2 - 1, L3 * 2 - L2 * 2);
+}
+
+static void test_hbitmap_set_twice(TestHBitmapData *data,
+                                   const void *unused)
+{
+    hbitmap_test_init(data, L1 * 3, 0);
+    hbitmap_test_set(data, 0, L1 * 3);
+    hbitmap_test_set(data, L1, 1);
+}
+
+static void test_hbitmap_set_overlap(TestHBitmapData *data,
+                                     const void *unused)
+{
+    hbitmap_test_init(data, L3 * 2, 0);
+    hbitmap_test_set(data, L1 - 1, L1 + 2);
+    hbitmap_test_set(data, L1 * 2 - 1, L1 * 2 + 2);
+    hbitmap_test_set(data, 0, L1 * 3);
+    hbitmap_test_set(data, L1 * 8 - 1, L2);
+    hbitmap_test_set(data, L2, L1);
+    hbitmap_test_set(data, L2 - L1 - 1, L1 * 8 + 2);
+    hbitmap_test_set(data, L2, L3 - L2 + 1);
+    hbitmap_test_set(data, L3 - L1, L1 * 3);
+    hbitmap_test_set(data, L3 - 1, 3);
+    hbitmap_test_set(data, L3 - 1, L2);
+}
+
+static void test_hbitmap_reset_empty(TestHBitmapData *data,
+                                     const void *unused)
+{
+    hbitmap_test_init(data, L3, 0);
+    hbitmap_test_reset(data, 0, L3);
+}
+
+static void test_hbitmap_reset(TestHBitmapData *data,
+                               const void *unused)
+{
+    hbitmap_test_init(data, L3 * 2, 0);
+    hbitmap_test_set(data, L1 - 1, L1 + 2);
+    hbitmap_test_reset(data, L1 * 2 - 1, L1 * 2 + 2);
+    hbitmap_test_set(data, 0, L1 * 3);
+    hbitmap_test_reset(data, L1 * 8 - 1, L2);
+    hbitmap_test_set(data, L2, L1);
+    hbitmap_test_reset(data, L2 - L1 - 1, L1 * 8 + 2);
+    hbitmap_test_set(data, L2, L3 - L2 + 1);
+    hbitmap_test_reset(data, L3 - L1, L1 * 3);
+    hbitmap_test_set(data, L3 - 1, 3);
+    hbitmap_test_reset(data, L3 - 1, L2);
+    hbitmap_test_set(data, 0, L3 * 2);
+    hbitmap_test_reset(data, 0, L1);
+    hbitmap_test_reset(data, 0, L2);
+    hbitmap_test_reset(data, L3, L3);
+    hbitmap_test_set(data, L3 / 2, L3);
+}
+
+static void test_hbitmap_granularity(TestHBitmapData *data,
+                                     const void *unused)
+{
+    /* Note that hbitmap_test_check has to be invoked manually in this test.  */
+    hbitmap_test_init(data, L1, 1);
+    hbitmap_test_set(data, 0, 1);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
+    hbitmap_test_check(data, 0);
+    hbitmap_test_set(data, 2, 1);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
+    hbitmap_test_check(data, 0);
+    hbitmap_test_set(data, 0, 3);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
+    hbitmap_test_reset(data, 0, 1);
+    g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
+}
+
+static void test_hbitmap_iter_granularity(TestHBitmapData *data,
+                                          const void *unused)
+{
+    HBitmapIter hbi;
+
+    /* Note that hbitmap_test_check has to be invoked manually in this test.  */
+    hbitmap_test_init(data, 131072 << 7, 7);
+    hbitmap_iter_init(&hbi, data->hb, 0);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
+    hbitmap_iter_init(&hbi, data->hb, 0);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_test_set(data, (131072 << 7) - 8, 8);
+    hbitmap_iter_init(&hbi, data->hb, 0);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+    hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
+    g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+}
+
+static void hbitmap_test_add(const char *testpath,
+                                   void (*test_func)(TestHBitmapData *data, const void *user_data))
+{
+    g_test_add(testpath, TestHBitmapData, NULL, NULL, test_func,
+               hbitmap_test_teardown);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    hbitmap_test_add("/hbitmap/size/0", test_hbitmap_zero);
+    hbitmap_test_add("/hbitmap/size/unaligned", test_hbitmap_unaligned);
+    hbitmap_test_add("/hbitmap/iter/empty", test_hbitmap_iter_empty);
+    hbitmap_test_add("/hbitmap/iter/past", test_hbitmap_iter_past);
+    hbitmap_test_add("/hbitmap/iter/partial", test_hbitmap_iter_partial);
+    hbitmap_test_add("/hbitmap/iter/granularity", test_hbitmap_iter_granularity);
+    hbitmap_test_add("/hbitmap/get/all", test_hbitmap_get_all);
+    hbitmap_test_add("/hbitmap/get/some", test_hbitmap_get_some);
+    hbitmap_test_add("/hbitmap/set/all", test_hbitmap_set_all);
+    hbitmap_test_add("/hbitmap/set/one", test_hbitmap_set_one);
+    hbitmap_test_add("/hbitmap/set/two-elem", test_hbitmap_set_two_elem);
+    hbitmap_test_add("/hbitmap/set/general", test_hbitmap_set);
+    hbitmap_test_add("/hbitmap/set/twice", test_hbitmap_set_twice);
+    hbitmap_test_add("/hbitmap/set/overlap", test_hbitmap_set_overlap);
+    hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty);
+    hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset);
+    hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity);
+    g_test_run();
+
+    return 0;
+}
diff --git a/trace-events b/trace-events
index 09091e6..732cb12 100644
--- a/trace-events
+++ b/trace-events
@@ -1060,3 +1060,8 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
 xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
 xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
 xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+
+# hbitmap.c
+hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
+hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 5baeb53..495a178 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -2,7 +2,7 @@ util-obj-y = osdep.o cutils.o qemu-timer-common.o
 util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
 util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o
 util-obj-y += envlist.o path.o host-utils.o cache-utils.o module.o
-util-obj-y += bitmap.o bitops.o
+util-obj-y += bitmap.o bitops.o hbitmap.o
 util-obj-y += acl.o
 util-obj-y += error.o qemu-error.o
 util-obj-$(CONFIG_POSIX) += compatfd.o
diff --git a/util/hbitmap.c b/util/hbitmap.c
new file mode 100644
index 0000000..fb7e01e
--- /dev/null
+++ b/util/hbitmap.c
@@ -0,0 +1,400 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits.  The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level.  When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ *   bits 0-57 => word in the last bitmap     | bits 58-63 => bit in the word
+ *   bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ *   bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits.  To move down, you shift the index left
+ * similarly, and add the word index within the group.  Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once.  Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps.  Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+    /* Number of total bits in the bottom level.  */
+    uint64_t size;
+
+    /* Number of set bits in the bottom level.  */
+    uint64_t count;
+
+    /* A scaling factor.  Given a granularity of G, each bit in the bitmap will
+     * will actually represent a group of 2^G elements.  Each operation on a
+     * range of bits first rounds the bits to determine which group they land
+     * in, and then affect the entire page; iteration will only visit the first
+     * bit of each group.  Here is an example of operations in a size-16,
+     * granularity-1 HBitmap:
+     *
+     *    initial state            00000000
+     *    set(start=0, count=9)    11111000 (iter: 0, 2, 4, 6, 8)
+     *    reset(start=1, count=3)  00111000 (iter: 4, 6, 8)
+     *    set(start=9, count=2)    00111100 (iter: 4, 6, 8, 10)
+     *    reset(start=5, count=5)  00000000
+     *
+     * From an implementation point of view, when setting or resetting bits,
+     * the bitmap will scale bit numbers right by this amount of bits.  When
+     * iterating, the bitmap will scale bit numbers left by this amount of
+     * bits.
+     */
+    int granularity;
+
+    /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+     * coarsest).  Each bit in level N represents a word in level N+1 that
+     * has a set bit, except the last level where each bit represents the
+     * actual bitmap.
+     *
+     * Note that all bitmaps have the same number of levels.  Even a 1-bit
+     * bitmap will still allocate HBITMAP_LEVELS arrays.
+     */
+    unsigned long *levels[HBITMAP_LEVELS];
+};
+
+static inline int popcountl(unsigned long l)
+{
+    return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l);
+}
+
+/* Advance hbi to the next nonzero word and return it.  hbi->pos
+ * is updated.  Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+    size_t pos = hbi->pos;
+    const HBitmap *hb = hbi->hb;
+    unsigned i = HBITMAP_LEVELS - 1;
+
+    unsigned long cur;
+    do {
+        cur = hbi->cur[--i];
+        pos >>= BITS_PER_LEVEL;
+    } while (cur == 0);
+
+    /* Check for end of iteration.  We always use fewer than BITS_PER_LONG
+     * bits in the level 0 bitmap; thus we can repurpose the most significant
+     * bit as a sentinel.  The sentinel is set in hbitmap_alloc and ensures
+     * that the above loop ends even without an explicit check on i.
+     */
+
+    if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+        return 0;
+    }
+    for (; i < HBITMAP_LEVELS - 1; i++) {
+        /* Shift back pos to the left, matching the right shifts above.
+         * The index of this word's least significant set bit provides
+         * the low-order bits.
+         */
+        pos = (pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
+        hbi->cur[i] = cur & (cur - 1);
+
+        /* Set up next level for iteration.  */
+        cur = hb->levels[i + 1][pos];
+    }
+
+    hbi->pos = pos;
+    trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+    assert(cur);
+    return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+    unsigned i, bit;
+    uint64_t pos;
+
+    hbi->hb = hb;
+    pos = first >> hb->granularity;
+    hbi->pos = pos >> BITS_PER_LEVEL;
+    hbi->granularity = hb->granularity;
+
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        bit = pos & (BITS_PER_LONG - 1);
+        pos >>= BITS_PER_LEVEL;
+
+        /* Drop bits representing items before first.  */
+        hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+        /* We have already added level i+1, so the lowest set bit has
+         * been processed.  Clear it.
+         */
+        if (i != HBITMAP_LEVELS - 1) {
+            hbi->cur[i] &= ~(1UL << bit);
+        }
+    }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+    return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+    return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+    return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity.  Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+    HBitmapIter hbi;
+    uint64_t count = 0;
+    uint64_t end = last + 1;
+    unsigned long cur;
+    size_t pos;
+
+    hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+    for (;;) {
+        pos = hbitmap_iter_next_word(&hbi, &cur);
+        if (pos >= (end >> BITS_PER_LEVEL)) {
+            break;
+        }
+        count += popcountl(cur);
+    }
+
+    if (pos == (end >> BITS_PER_LEVEL)) {
+        /* Drop bits representing the END-th and subsequent items.  */
+        int bit = end & (BITS_PER_LONG - 1);
+        cur &= (1UL << bit) - 1;
+        count += popcountl(cur);
+    }
+
+    return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool changed;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    changed = (*elem == 0);
+    *elem |= mask;
+    return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+        changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] == 0);
+            hb->levels[level][i] = ~0UL;
+        }
+    }
+    changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+    /* If there was any change in this layer, we may have to update
+     * the one above.
+     */
+    if (level > 0 && changed) {
+        hb_set_between(hb, level - 1, pos, lastpos);
+    }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t last = start + count - 1;
+
+    trace_hbitmap_set(hb, start, count,
+                      start >> hb->granularity, last >> hb->granularity);
+
+    start >>= hb->granularity;
+    last >>= hb->granularity;
+    count = last - start + 1;
+
+    hb->count += count - hb_count_between(hb, start, last);
+    hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool blanked;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    blanked = *elem != 0 && ((*elem & ~mask) == 0);
+    *elem &= ~mask;
+    return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+        /* Here we need a more complex test than when setting bits.  Even if
+         * something was changed, we must not blank bits in the upper level
+         * unless the lower-level word became entirely zero.  So, remove pos
+         * from the upper-level range if bits remain set.
+         */
+        if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+            changed = true;
+        } else {
+            pos++;
+        }
+
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] != 0);
+            hb->levels[level][i] = 0UL;
+        }
+    }
+
+    /* Same as above, this time for lastpos.  */
+    if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+        changed = true;
+    } else {
+        lastpos--;
+    }
+
+    if (level > 0 && changed) {
+        hb_reset_between(hb, level - 1, pos, lastpos);
+    }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t last = start + count - 1;
+
+    trace_hbitmap_reset(hb, start, count,
+                        start >> hb->granularity, last >> hb->granularity);
+
+    start >>= hb->granularity;
+    last >>= hb->granularity;
+
+    hb->count -= hb_count_between(hb, start, last);
+    hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+    /* Compute position and bit in the last layer.  */
+    uint64_t pos = item >> hb->granularity;
+    unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+    return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+    unsigned i;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        g_free(hb->levels[i]);
+    }
+    g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+    HBitmap *hb = g_malloc0(sizeof (struct HBitmap));
+    unsigned i;
+
+    assert(granularity >= 0 && granularity < 64);
+    size = (size + (1ULL << granularity) - 1) >> granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+    hb->size = size;
+    hb->granularity = granularity;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        hb->levels[i] = g_malloc0(size * sizeof(unsigned long));
+    }
+
+    /* We necessarily have free bits in level 0 due to the definition
+     * of HBITMAP_LEVELS, so use one for a sentinel.  This speeds up
+     * hbitmap_iter_skip_words.
+     */
+    assert(size == 1);
+    hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+    return hb;
+}
commit 4c37ef022381e777251d7084591978a4dc622efe
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jan 21 17:09:39 2013 +0100

    host-utils: add ffsl
    
    We can provide fast versions based on the other functions defined
    by host-utils.h.  Some care is required on glibc, which provides
    ffsl already.
    
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 81c9a75..2a32be4 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@
 #define HOST_UTILS_H 1
 
 #include "qemu/compiler.h"   /* QEMU_GNUC_PREREQ */
+#include <string.h>     /* ffsl */
 
 #if defined(__x86_64__)
 #define __HAVE_FAST_MULU64__
@@ -237,4 +238,29 @@ static inline int ctpop64(uint64_t val)
 #endif
 }
 
+/* glibc does not provide an inline version of ffsl, so always define
+ * ours.  We need to give it a different name, however.
+ */
+#ifdef __GLIBC__
+#define ffsl qemu_ffsl
+#endif
+static inline int ffsl(long val)
+{
+    if (!val) {
+        return 0;
+    }
+
+#if QEMU_GNUC_PREREQ(3, 4)
+    return __builtin_ctzl(val) + 1;
+#else
+    if (sizeof(long) == 4) {
+        return ctz32(val) + 1;
+    } else if (sizeof(long) == 8) {
+        return ctz64(val) + 1;
+    } else {
+        abort();
+    }
+#endif
+}
+
 #endif
commit 49b6d7220bce42e6c06e0dbb61969a997868491f
Author: Lei Li <lilei at linux.vnet.ibm.com>
Date:   Fri Jan 25 00:03:21 2013 +0800

    QAPI: Introduce memchar-read QMP command
    
    Signed-off-by: Lei Li <lilei at linux.vnet.ibm.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/hmp-commands.hx b/hmp-commands.hx
index bcfea11..bdd48f3 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -858,6 +858,27 @@ to char device 'memory'.
 ETEXI
 
     {
+        .name       = "memchar_read",
+        .args_type  = "device:s,size:i",
+        .params     = "device size",
+        .help       = "Provide read interface for CirMemCharDriver. Read from"
+                      "it and return the data with size.",
+        .mhandler.cmd = hmp_memchar_read,
+    },
+
+STEXI
+ at item memchar_read @var{device}
+ at findex memchar_read
+Provide read interface for CirMemCharDriver. Read from char device
+'memory' and return the data.
+
+ at var{size} is the size of data want to read from. Refer to unencoded
+size of the raw data, would adjust to the init size of the memchar
+if the requested size is larger than it.
+
+ETEXI
+
+    {
         .name       = "migrate",
         .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
         .params     = "[-d] [-b] [-i] uri",
diff --git a/hmp.c b/hmp.c
index cd614e8..7e259c2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -675,6 +675,27 @@ void hmp_memchar_write(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &errp);
 }
 
+void hmp_memchar_read(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size = qdict_get_int(qdict, "size");
+    const char *chardev = qdict_get_str(qdict, "device");
+    MemCharRead *meminfo;
+    Error *errp = NULL;
+
+    meminfo = qmp_memchar_read(chardev, size, false, 0, &errp);
+    if (errp) {
+        monitor_printf(mon, "%s\n", error_get_pretty(errp));
+        error_free(errp);
+        return;
+    }
+
+    if (meminfo->count > 0) {
+        monitor_printf(mon, "%s\n", meminfo->data);
+    }
+
+    qapi_free_MemCharRead(meminfo);
+}
+
 static void hmp_cont_cb(void *opaque, int err)
 {
     if (!err) {
diff --git a/hmp.h b/hmp.h
index 06d6ea2..076d8cf 100644
--- a/hmp.h
+++ b/hmp.h
@@ -44,6 +44,7 @@ void hmp_cpu(Monitor *mon, const QDict *qdict);
 void hmp_memsave(Monitor *mon, const QDict *qdict);
 void hmp_pmemsave(Monitor *mon, const QDict *qdict);
 void hmp_memchar_write(Monitor *mon, const QDict *qdict);
+void hmp_memchar_read(Monitor *mon, const QDict *qdict);
 void hmp_cont(Monitor *mon, const QDict *qdict);
 void hmp_system_wakeup(Monitor *mon, const QDict *qdict);
 void hmp_inject_nmi(Monitor *mon, const QDict *qdict);
diff --git a/qapi-schema.json b/qapi-schema.json
index d6231e5..6c29f56 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -363,6 +363,42 @@
            '*format': 'DataFormat'} }
 
 ##
+# @MemCharRead
+#
+# Result of QMP command memchar-read.
+#
+# @data: The data read from memchar as string.
+#
+# @count: The numbers of bytes read from.
+#
+# Since: 1.4
+##
+{ 'type': 'MemCharRead',
+  'data': { 'data': 'str', 'count': 'int' } }
+
+##
+# @memchar-read:
+#
+# Provide read interface for memchardev. Read from the char
+# device 'memory' and return the data.
+#
+# @device: the name of the memory char device.
+#
+# @size: the size to read in bytes.
+#
+# @format: #optional the format of the data want to read from
+#          memchardev, by default is 'utf8'.
+#
+# Returns: @MemCharRead
+#          If @device is not a valid memchr device, DeviceNotFound
+#
+# Since: 1.4
+##
+{ 'command': 'memchar-read',
+  'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
+  'returns': 'MemCharRead' }
+
+##
 # @CommandInfo:
 #
 # Information about a QMP command
diff --git a/qemu-char.c b/qemu-char.c
index dbd1a7c..1d1c631 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2790,6 +2790,53 @@ void qmp_memchar_write(const char *device, int64_t size,
     }
 }
 
+MemCharRead *qmp_memchar_read(const char *device, int64_t size,
+                              bool has_format, enum DataFormat format,
+                              Error **errp)
+{
+    CharDriverState *chr;
+    guchar *read_data;
+    MemCharRead *meminfo;
+    size_t count;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return NULL;
+    }
+
+    if (qemu_is_chr(chr, "memory")) {
+        error_setg(errp,"%s is not memory char device", device);
+        return NULL;
+    }
+
+    if (size <= 0) {
+        error_setg(errp, "size must be greater than zero");
+        return NULL;
+    }
+
+    meminfo = g_malloc0(sizeof(MemCharRead));
+
+    count = qemu_chr_cirmem_count(chr);
+    if (count == 0) {
+        meminfo->data = g_strdup("");
+        return meminfo;
+    }
+
+    size = size > count ? count : size;
+    read_data = g_malloc0(size + 1);
+
+    meminfo->count = cirmem_chr_read(chr, read_data, size);
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        meminfo->data = g_base64_encode(read_data, (size_t)meminfo->count);
+    } else {
+        meminfo->data = (char *)read_data;
+    }
+
+    return meminfo;
+}
+
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
 {
     char host[65], port[33], width[8], height[8];
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 96de0b8..f0f7d2b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -499,6 +499,39 @@ Example:
 EQMP
 
     {
+        .name       = "memchar-read",
+        .args_type  = "device:s,size:i,format:s?",
+        .mhandler.cmd_new = qmp_marshal_input_memchar_read,
+    },
+
+SQMP
+memchar-read
+-------------
+
+Provide read interface for CirMemCharDriver. Read from the char
+device memory and return the data with size.
+
+Arguments:
+
+- "device": the name of the char device, must be unique (json-string)
+- "size": the memory size wanted to read in bytes (refer to unencoded
+          size of the raw data), would adjust to the init size of the
+          memchar if the requested size is larger than it. (json-int)
+- "format": the data format write to memchardev, default is
+            utf8. (json-string, optional)
+          - Possible values: "utf8", "base64"
+
+Example:
+
+-> { "execute": "memchar-read",
+                "arguments": { "device": foo,
+                               "size": 1000,
+                               "format": "utf8" } }
+<- { "return": { "data": "data string...", "count": 1000 } }
+
+EQMP
+
+    {
         .name       = "xen-save-devices-state",
         .args_type  = "filename:F",
     .mhandler.cmd_new = qmp_marshal_input_xen_save_devices_state,
commit 1f590cf9455c571799d1bfc0777255fa0796d4da
Author: Lei Li <lilei at linux.vnet.ibm.com>
Date:   Fri Jan 25 00:03:20 2013 +0800

    QAPI: Introduce memchar-write QMP command
    
    Signed-off-by: Lei Li <lilei at linux.vnet.ibm.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0934b9b..bcfea11 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -837,6 +837,24 @@ STEXI
 @item nmi @var{cpu}
 @findex nmi
 Inject an NMI on the given CPU (x86 only).
+
+ETEXI
+
+    {
+        .name       = "memchar_write",
+        .args_type  = "device:s,data:s",
+        .params     = "device data",
+        .help       = "Provide writing interface for CirMemCharDriver. Write"
+                      "'data' to it.",
+        .mhandler.cmd = hmp_memchar_write,
+    },
+
+STEXI
+ at item memchar_write @var{device} @var{data}
+ at findex memchar_write
+Provide writing interface for CirMemCharDriver. Write @var{data}
+to char device 'memory'.
+
 ETEXI
 
     {
diff --git a/hmp.c b/hmp.c
index ae16916..cd614e8 100644
--- a/hmp.c
+++ b/hmp.c
@@ -662,6 +662,19 @@ void hmp_pmemsave(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &errp);
 }
 
+void hmp_memchar_write(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size;
+    const char *chardev = qdict_get_str(qdict, "device");
+    const char *data = qdict_get_str(qdict, "data");
+    Error *errp = NULL;
+
+    size = strlen(data);
+    qmp_memchar_write(chardev, size, data, false, 0, &errp);
+
+    hmp_handle_error(mon, &errp);
+}
+
 static void hmp_cont_cb(void *opaque, int err)
 {
     if (!err) {
diff --git a/hmp.h b/hmp.h
index 44be683..06d6ea2 100644
--- a/hmp.h
+++ b/hmp.h
@@ -43,6 +43,7 @@ void hmp_system_powerdown(Monitor *mon, const QDict *qdict);
 void hmp_cpu(Monitor *mon, const QDict *qdict);
 void hmp_memsave(Monitor *mon, const QDict *qdict);
 void hmp_pmemsave(Monitor *mon, const QDict *qdict);
+void hmp_memchar_write(Monitor *mon, const QDict *qdict);
 void hmp_cont(Monitor *mon, const QDict *qdict);
 void hmp_system_wakeup(Monitor *mon, const QDict *qdict);
 void hmp_inject_nmi(Monitor *mon, const QDict *qdict);
diff --git a/qapi-schema.json b/qapi-schema.json
index a4c6eca..d6231e5 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -325,6 +325,44 @@
 { 'command': 'query-chardev', 'returns': ['ChardevInfo'] }
 
 ##
+# @DataFormat:
+#
+# An enumeration of data format.
+#
+# @utf8: The data format is 'utf8'.
+#
+# @base64: The data format is 'base64'.
+#
+# Since: 1.4
+##
+{ 'enum': 'DataFormat'
+  'data': [ 'utf8', 'base64' ] }
+
+##
+# @memchar-write:
+#
+# Provide writing interface for memchardev. Write data to char
+# device 'memory'.
+#
+# @device: the name of the memory char device.
+#
+# @size: the size to write in bytes.
+#
+# @data: the source data write to memchar.
+#
+# @format: #optional the format of the data write to chardev 'memory',
+#          by default is 'utf8'.
+#
+# Returns: Nothing on success
+#          If @device is not a valid char device, DeviceNotFound
+#
+# Since: 1.4
+##
+{ 'command': 'memchar-write',
+  'data': {'device': 'str', 'size': 'int', 'data': 'str',
+           '*format': 'DataFormat'} }
+
+##
 # @CommandInfo:
 #
 # Information about a QMP command
diff --git a/qemu-char.c b/qemu-char.c
index 8045869..dbd1a7c 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2748,6 +2748,48 @@ fail:
     return NULL;
 }
 
+static bool qemu_is_chr(const CharDriverState *chr, const char *filename)
+{
+    return strcmp(chr->filename, filename);
+}
+
+void qmp_memchar_write(const char *device, int64_t size,
+                       const char *data, bool has_format,
+                       enum DataFormat format,
+                       Error **errp)
+{
+    CharDriverState *chr;
+    guchar *write_data;
+    int ret;
+    gsize write_count;
+
+    chr = qemu_chr_find(device);
+    if (!chr) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    if (qemu_is_chr(chr, "memory")) {
+        error_setg(errp,"%s is not memory char device", device);
+        return;
+    }
+
+    write_count = (gsize)size;
+
+    if (has_format && (format == DATA_FORMAT_BASE64)) {
+        write_data = g_base64_decode(data, &write_count);
+    } else {
+        write_data = (uint8_t *)data;
+    }
+
+    ret = cirmem_chr_write(chr, write_data, write_count);
+
+    if (ret < 0) {
+        error_setg(errp, "Failed to write to device %s", device);
+        return;
+    }
+}
+
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
 {
     char host[65], port[33], width[8], height[8];
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 3be5330..96de0b8 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -466,6 +466,39 @@ Note: inject-nmi fails when the guest doesn't support injecting.
 EQMP
 
     {
+        .name       = "memchar-write",
+        .args_type  = "device:s,size:i,data:s,format:s?",
+        .mhandler.cmd_new = qmp_marshal_input_memchar_write,
+    },
+
+SQMP
+memchar-write
+-------------
+
+Provide writing interface for CirMemCharDriver. Write data to memory
+char device.
+
+Arguments:
+
+- "device": the name of the char device, must be unique (json-string)
+- "size": the memory size, in bytes, should be power of 2 (json-int)
+- "data": the source data write to memory (json-string)
+- "format": the data format write to memory, default is
+            utf8. (json-string, optional)
+          - Possible values: "utf8", "base64"
+
+Example:
+
+-> { "execute": "memchar-write",
+                "arguments": { "device": foo,
+                               "size": 8,
+                               "data": "abcdefgh",
+                               "format": "utf8" } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "xen-save-devices-state",
         .args_type  = "filename:F",
     .mhandler.cmd_new = qmp_marshal_input_xen_save_devices_state,
commit 51767e7cf2c3abc07d30009ab3d6262bdfd89b8b
Author: Lei Li <lilei at linux.vnet.ibm.com>
Date:   Fri Jan 25 00:03:19 2013 +0800

    qemu-char: Add new char backend CirMemCharDriver
    
    Signed-off-by: Lei Li <lilei at linux.vnet.ibm.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/qemu-char.c b/qemu-char.c
index 9ba0573..8045869 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -98,6 +98,7 @@
 #include "ui/qemu-spice.h"
 
 #define READ_BUF_LEN 4096
+#define CBUFF_SIZE 65536
 
 /***********************************************************/
 /* character device */
@@ -2643,6 +2644,110 @@ size_t qemu_chr_mem_osize(const CharDriverState *chr)
     return d->outbuf_size;
 }
 
+/*********************************************************/
+/*CircularMemory chardev*/
+
+typedef struct {
+    size_t size;
+    size_t prod;
+    size_t cons;
+    uint8_t *cbuf;
+} CirMemCharDriver;
+
+static bool cirmem_chr_is_empty(const CharDriverState *chr)
+{
+    const CirMemCharDriver *d = chr->opaque;
+
+    return d->cons == d->prod;
+}
+
+static size_t qemu_chr_cirmem_count(const CharDriverState *chr)
+{
+    const CirMemCharDriver *d = chr->opaque;
+
+    return (d->prod - d->cons);
+}
+
+static int cirmem_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
+{
+    CirMemCharDriver *d = chr->opaque;
+    int i;
+
+    if (!buf || (len < 0)) {
+        return -1;
+    }
+
+    for (i = 0; i < len; i++ ) {
+        /* Avoid writing the IAC information to the queue. */
+        if ((unsigned char)buf[i] == IAC) {
+            continue;
+        }
+
+        d->cbuf[d->prod++ % d->size] = buf[i];
+        if ((d->prod - d->cons) > d->size) {
+            d->cons = d->prod - d->size;
+        }
+    }
+
+    return 0;
+}
+
+static int cirmem_chr_read(CharDriverState *chr, uint8_t *buf, int len)
+{
+    CirMemCharDriver *d = chr->opaque;
+    int i;
+
+    for (i = 0; i < len && !cirmem_chr_is_empty(chr); i++) {
+        buf[i] = d->cbuf[d->cons++ % d->size];
+    }
+
+    return i;
+}
+
+static void cirmem_chr_close(struct CharDriverState *chr)
+{
+    CirMemCharDriver *d = chr->opaque;
+
+    g_free(d->cbuf);
+    g_free(d);
+    chr->opaque = NULL;
+}
+
+static CharDriverState *qemu_chr_open_cirmemchr(QemuOpts *opts)
+{
+    CharDriverState *chr;
+    CirMemCharDriver *d;
+
+    chr = g_malloc0(sizeof(CharDriverState));
+    d = g_malloc(sizeof(*d));
+
+    d->size = qemu_opt_get_number(opts, "maxcapacity", 0);
+    if (d->size == 0) {
+        d->size = CBUFF_SIZE;
+    }
+
+    /* The size must be power of 2 */
+    if (d->size & (d->size - 1)) {
+        fprintf(stderr, "chardev: size of memory device must be power of 2\n");
+        goto fail;
+    }
+
+    d->prod = 0;
+    d->cons = 0;
+    d->cbuf = g_malloc0(d->size);
+
+    chr->opaque = d;
+    chr->chr_write = cirmem_chr_write;
+    chr->chr_close = cirmem_chr_close;
+
+    return chr;
+
+fail:
+    g_free(d);
+    g_free(chr);
+    return NULL;
+}
+
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
 {
     char host[65], port[33], width[8], height[8];
@@ -2697,6 +2802,11 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
         qemu_opt_set(opts, "path", filename);
         return opts;
     }
+    if (strstart(filename, "memory", &p)) {
+        qemu_opt_set(opts, "backend", "memory");
+        qemu_opt_set(opts, "maxcapacity", p);
+        return opts;
+    }
     if (strstart(filename, "file:", &p)) {
         qemu_opt_set(opts, "backend", "file");
         qemu_opt_set(opts, "path", p);
@@ -2796,6 +2906,7 @@ static const struct {
     { .name = "udp",       .open = qemu_chr_open_udp },
     { .name = "msmouse",   .open = qemu_chr_open_msmouse },
     { .name = "vc",        .open = text_console_init },
+    { .name = "memory",    .open = qemu_chr_open_cirmemchr },
 #ifdef _WIN32
     { .name = "file",      .open = qemu_chr_open_win_file_out },
     { .name = "pipe",      .open = qemu_chr_open_win_pipe },
@@ -3055,6 +3166,9 @@ QemuOptsList qemu_chardev_opts = {
         },{
             .name = "debug",
             .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "maxcapacity",
+            .type = QEMU_OPT_NUMBER,
         },
         { /* end of list */ }
     },
diff --git a/qemu-options.hx b/qemu-options.hx
index 4e2b499..2d44137 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1736,6 +1736,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
     "-chardev msmouse,id=id[,mux=on|off]\n"
     "-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n"
     "         [,mux=on|off]\n"
+    "-chardev memory,id=id,maxcapacity=maxcapacity\n"
     "-chardev file,id=id,path=path[,mux=on|off]\n"
     "-chardev pipe,id=id,path=path[,mux=on|off]\n"
 #ifdef _WIN32
@@ -1777,6 +1778,7 @@ Backend is one of:
 @option{udp},
 @option{msmouse},
 @option{vc},
+ at option{memory},
 @option{file},
 @option{pipe},
 @option{console},
@@ -1885,6 +1887,14 @@ the console, in pixels.
 @option{cols} and @option{rows} specify that the console be sized to fit a text
 console with the given dimensions.
 
+ at item -chardev memory ,id=@var{id} ,maxcapacity=@var{maxcapacity}
+
+Create a circular buffer with fixed size indicated by optionally @option{maxcapacity}
+which will be default 64K if it is not given.
+
+ at option{maxcapacity} specifies the max capacity of the size of circular buffer
+to create. Should be power of 2.
+
 @item -chardev file ,id=@var{id} ,path=@var{path}
 
 Log all traffic received from the guest to a file.
commit 045a70857ac07de791ddbf313323c4e4f91e056e
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Tue Dec 4 12:04:39 2012 -0200

    docs: document virtio-balloon stats
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>

diff --git a/docs/virtio-balloon-stats.txt b/docs/virtio-balloon-stats.txt
new file mode 100644
index 0000000..f74612f
--- /dev/null
+++ b/docs/virtio-balloon-stats.txt
@@ -0,0 +1,104 @@
+virtio balloon memory statistics
+================================
+
+The virtio balloon driver supports guest memory statistics reporting. These
+statistics are available to QEMU users as QOM (QEMU Object Model) device
+properties via a polling mechanism.
+
+Before querying the available stats, clients first have to enable polling.
+This is done by writing a time interval value (in seconds) to the
+guest-stats-polling-interval property. This value can be:
+
+  > 0  enables polling in the specified interval. If polling is already
+       enabled, the polling time interval is changed to the new value
+
+  0    disables polling. Previous polled statistics are still valid and
+       can be queried.
+
+Once polling is enabled, the virtio-balloon device in QEMU will start
+polling the guest's balloon driver for new stats in the specified time
+interval.
+
+To retrieve those stats, clients have to query the guest-stats property,
+which will return a dictionary containing:
+
+  o A key named 'stats', containing all available stats. If the guest
+    doesn't support a particular stat, or if it couldn't be retrieved,
+    its value will be -1. Currently, the following stats are supported:
+
+      - stat-swap-in
+      - stat-swap-out
+      - stat-major-faults
+      - stat-minor-faults
+      - stat-free-memory
+      - stat-total-memory
+
+  o A key named last-update, which contains the last stats update
+    timestamp in seconds. Since this timestamp is generated by the host,
+    a buggy guest can't influence its value
+
+It's also important to note the following:
+
+ - Previously polled statistics remain available even if the polling is
+   later disabled
+
+ - As noted above, if a guest doesn't support a particular stat its value
+   will always be -1. However, it's also possible that a guest temporarily
+   couldn't update one or even all stats. If this happens, just wait for
+   the next update
+
+ - Polling can be enabled even if the guest doesn't have stats support
+   or the balloon driver wasn't loaded in the guest. If this is the case
+   and stats are queried, an error will be returned
+
+ - The polling timer is only re-armed when the guest responds to the
+   statistics request. This means that if a (buggy) guest doesn't ever
+   respond to the request the timer will never be re-armed, which has
+   the same effect as disabling polling
+
+Here are a few examples. QEMU is started with '-balloon virtio', which
+generates '/machine/peripheral-anon/device[1]' as the QOM path for the
+balloon device.
+
+Enable polling with 2 seconds interval:
+
+{ "execute": "qom-set",
+             "arguments": { "path": "/machine/peripheral-anon/device[1]",
+			 "property": "guest-stats-polling-interval", "value": 2 } }
+
+{ "return": {} }
+
+Change polling to 10 seconds:
+
+{ "execute": "qom-set",
+             "arguments": { "path": "/machine/peripheral-anon/device[1]",
+			 "property": "guest-stats-polling-interval", "value": 10 } }
+
+{ "return": {} }
+
+Get stats:
+
+{ "execute": "qom-get",
+  "arguments": { "path": "/machine/peripheral-anon/device[1]",
+  "property": "guest-stats" } }
+{
+    "return": {
+        "stats": {
+            "stat-swap-out": 0,
+            "stat-free-memory": 844943360,
+            "stat-minor-faults": 219028,
+            "stat-major-faults": 235,
+            "stat-total-memory": 1044406272,
+            "stat-swap-in": 0
+        },
+        "last-update": 1358529861
+    }
+}
+
+Disable polling:
+
+{ "execute": "qom-set",
+             "arguments": { "path": "/machine/peripheral-anon/device[1]",
+			 "property": "stats-polling-interval", "value": 0 } }
+
+{ "return": {} }
commit 7e6ccd9c159450e79f42d08112ebe263b0466644
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Sat Dec 1 00:14:57 2012 -0200

    balloon: re-enable balloon stats
    
    The statistics are now available through device properties via a
    polling mechanism. First a client has to enable polling, then it
    can query available stats.
    
    Polling is enabled by setting an update interval (in seconds)
    to a property named guest-stats-polling-interval, like this:
    
    { "execute": "qom-set",
      "arguments": { "path": "/machine/peripheral-anon/device[1]",
                     "property": "guest-stats-polling-interval", "value": 4 } }
    
    Then the available stats can be retrieved by querying the
    guest-stats property. The returned object is a dict containing
    all available stats. Example:
    
    { "execute": "qom-get",
      "arguments": { "path": "/machine/peripheral-anon/device[1]",
      "property": "guest-stats" } }
    
    {
        "return": {
            "stats": {
                "stat-swap-out": 0,
                "stat-free-memory": 844943360,
                "stat-minor-faults": 219028,
                "stat-major-faults": 235,
                "stat-total-memory": 1044406272,
                "stat-swap-in": 0
            },
            "last-update": 1358529861
        }
    }
    
    Please, check the next commit for full documentation.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>

diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 2520cba..c0a7902 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -14,6 +14,7 @@
  */
 
 #include "qemu/iov.h"
+#include "qemu/timer.h"
 #include "qemu-common.h"
 #include "virtio.h"
 #include "pc.h"
@@ -22,6 +23,7 @@
 #include "virtio-balloon.h"
 #include "sysemu/kvm.h"
 #include "exec/address-spaces.h"
+#include "qapi/visitor.h"
 
 #if defined(__linux__)
 #include <sys/mman.h>
@@ -36,6 +38,9 @@ typedef struct VirtIOBalloon
     uint64_t stats[VIRTIO_BALLOON_S_NR];
     VirtQueueElement stats_vq_elem;
     size_t stats_vq_offset;
+    QEMUTimer *stats_timer;
+    int64_t stats_last_update;
+    int64_t stats_poll_interval;
     DeviceState *qdev;
 } VirtIOBalloon;
 
@@ -53,6 +58,16 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static const char *balloon_stat_names[] = {
+   [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
+   [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
+   [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
+   [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
+   [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
+   [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
+   [VIRTIO_BALLOON_S_NR] = NULL
+};
+
 /*
  * reset_stats - Mark all items in the stats array as unset
  *
@@ -67,6 +82,118 @@ static inline void reset_stats(VirtIOBalloon *dev)
     for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
 }
 
+static bool balloon_stats_supported(const VirtIOBalloon *s)
+{
+    return s->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ);
+}
+
+static bool balloon_stats_enabled(const VirtIOBalloon *s)
+{
+    return s->stats_poll_interval > 0;
+}
+
+static void balloon_stats_destroy_timer(VirtIOBalloon *s)
+{
+    if (balloon_stats_enabled(s)) {
+        qemu_del_timer(s->stats_timer);
+        qemu_free_timer(s->stats_timer);
+        s->stats_timer = NULL;
+        s->stats_poll_interval = 0;
+    }
+}
+
+static void balloon_stats_change_timer(VirtIOBalloon *s, int secs)
+{
+    qemu_mod_timer(s->stats_timer, qemu_get_clock_ms(vm_clock) + secs * 1000);
+}
+
+static void balloon_stats_poll_cb(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+
+    if (!balloon_stats_supported(s)) {
+        /* re-schedule */
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+        return;
+    }
+
+    virtqueue_push(s->svq, &s->stats_vq_elem, s->stats_vq_offset);
+    virtio_notify(&s->vdev, s->svq);
+}
+
+static void balloon_stats_get_all(Object *obj, struct Visitor *v,
+                                  void *opaque, const char *name, Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    int i;
+
+    if (!s->stats_last_update) {
+        error_setg(errp, "guest hasn't updated any stats yet");
+        return;
+    }
+
+    visit_start_struct(v, NULL, "guest-stats", name, 0, errp);
+    visit_type_int(v, &s->stats_last_update, "last-update", errp);
+
+    visit_start_struct(v, NULL, NULL, "stats", 0, errp);
+    for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
+        visit_type_int64(v, (int64_t *) &s->stats[i], balloon_stat_names[i],
+                         errp);
+    }
+    visit_end_struct(v, errp);
+
+    visit_end_struct(v, errp);
+}
+
+static void balloon_stats_get_poll_interval(Object *obj, struct Visitor *v,
+                                            void *opaque, const char *name,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    visit_type_int(v, &s->stats_poll_interval, name, errp);
+}
+
+static void balloon_stats_set_poll_interval(Object *obj, struct Visitor *v,
+                                            void *opaque, const char *name,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    int64_t value;
+
+    visit_type_int(v, &value, name, errp);
+    if (error_is_set(errp)) {
+        return;
+    }
+
+    if (value < 0) {
+        error_setg(errp, "timer value must be greater than zero");
+        return;
+    }
+
+    if (value == s->stats_poll_interval) {
+        return;
+    }
+
+    if (value == 0) {
+        /* timer=0 disables the timer */
+        balloon_stats_destroy_timer(s);
+        return;
+    }
+
+    if (balloon_stats_enabled(s)) {
+        /* timer interval change */
+        s->stats_poll_interval = value;
+        balloon_stats_change_timer(s, value);
+        return;
+    }
+
+    /* create a new timer */
+    g_assert(s->stats_timer == NULL);
+    s->stats_timer = qemu_new_timer_ms(vm_clock, balloon_stats_poll_cb, s);
+    s->stats_poll_interval = value;
+    balloon_stats_change_timer(s, 0);
+}
+
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOBalloon *s = to_virtio_balloon(vdev);
@@ -107,9 +234,10 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
     VirtQueueElement *elem = &s->stats_vq_elem;
     VirtIOBalloonStat stat;
     size_t offset = 0;
+    qemu_timeval tv;
 
     if (!virtqueue_pop(vq, elem)) {
-        return;
+        goto out;
     }
 
     /* Initialize the stats to get rid of any stale values.  This is only
@@ -128,6 +256,18 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
             s->stats[tag] = val;
     }
     s->stats_vq_offset = offset;
+
+    if (qemu_gettimeofday(&tv) < 0) {
+        fprintf(stderr, "warning: %s: failed to get time of day\n", __func__);
+        goto out;
+    }
+
+    s->stats_last_update = tv.tv_sec;
+
+out:
+    if (balloon_stats_enabled(s)) {
+        balloon_stats_change_timer(s, s->stats_poll_interval);
+    }
 }
 
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
@@ -237,6 +377,14 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
     register_savevm(dev, "virtio-balloon", -1, 1,
                     virtio_balloon_save, virtio_balloon_load, s);
 
+    object_property_add(OBJECT(dev), "guest-stats", "guest statistics",
+                        balloon_stats_get_all, NULL, NULL, s, NULL);
+
+    object_property_add(OBJECT(dev), "guest-stats-polling-interval", "int",
+                        balloon_stats_get_poll_interval,
+                        balloon_stats_set_poll_interval,
+                        NULL, s, NULL);
+
     return &s->vdev;
 }
 
@@ -244,6 +392,7 @@ void virtio_balloon_exit(VirtIODevice *vdev)
 {
     VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
 
+    balloon_stats_destroy_timer(s);
     qemu_remove_balloon_handler(s);
     unregister_savevm(s->qdev, "virtio-balloon", s);
     virtio_cleanup(vdev);
commit 01ceb97e7b8b00a1d4779543fe4b958df7e16890
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Mon Dec 3 15:56:41 2012 -0200

    balloon: drop old stats code & API
    
    Next commit will re-enable balloon stats with a different interface, but
    this old code conflicts with it. Let's drop it.
    
    It's important to note that the QMP and HMP interfaces are also dropped
    by this commit. That shouldn't be a problem though, because:
    
     1. All QMP fields are optional
     2. This feature has always been disabled
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>

diff --git a/hmp.c b/hmp.c
index c7b6ba0..ae16916 100644
--- a/hmp.c
+++ b/hmp.c
@@ -465,29 +465,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
-    if (info->has_mem_swapped_in) {
-        monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
-    }
-    if (info->has_mem_swapped_out) {
-        monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
-    }
-    if (info->has_major_page_faults) {
-        monitor_printf(mon, " major_page_faults=%" PRId64,
-                       info->major_page_faults);
-    }
-    if (info->has_minor_page_faults) {
-        monitor_printf(mon, " minor_page_faults=%" PRId64,
-                       info->minor_page_faults);
-    }
-    if (info->has_free_mem) {
-        monitor_printf(mon, " free_mem=%" PRId64, info->free_mem);
-    }
-    if (info->has_total_mem) {
-        monitor_printf(mon, " total_mem=%" PRId64, info->total_mem);
-    }
-
-    monitor_printf(mon, "\n");
+    monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
 
     qapi_free_BalloonInfo(info);
 }
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 3040bc6..2520cba 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -164,28 +164,6 @@ static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
 {
     VirtIOBalloon *dev = opaque;
-
-#if 0
-    /* Disable guest-provided stats for now. For more details please check:
-     * https://bugzilla.redhat.com/show_bug.cgi?id=623903
-     *
-     * If you do enable it (which is probably not going to happen as we
-     * need a new command for it), remember that you also need to fill the
-     * appropriate members of the BalloonInfo structure so that the stats
-     * are returned to the client.
-     */
-    if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
-        virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
-        virtio_notify(&dev->vdev, dev->svq);
-        return;
-    }
-#endif
-
-    /* Stats are not supported.  Clear out any stale values that might
-     * have been set by a more featureful guest kernel.
-     */
-    reset_stats(dev);
-
     info->actual = ram_size - ((uint64_t) dev->actual <<
                                VIRTIO_BALLOON_PFN_SHIFT);
 }
@@ -255,8 +233,6 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
     s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
     s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
 
-    reset_stats(s);
-
     s->qdev = dev;
     register_savevm(dev, "virtio-balloon", -1, 1,
                     virtio_balloon_save, virtio_balloon_load, s);
diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..a4c6eca 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -977,28 +977,10 @@
 #
 # @actual: the number of bytes the balloon currently contains
 #
-# @mem_swapped_in: #optional number of pages swapped in within the guest
-#
-# @mem_swapped_out: #optional number of pages swapped out within the guest
-#
-# @major_page_faults: #optional number of major page faults within the guest
-#
-# @minor_page_faults: #optional number of minor page faults within the guest
-#
-# @free_mem: #optional amount of memory (in bytes) free in the guest
-#
-# @total_mem: #optional amount of memory (in bytes) visible to the guest
-#
 # Since: 0.14.0
 #
-# Notes: all current versions of QEMU do not fill out optional information in
-#        this structure.
 ##
-{ 'type': 'BalloonInfo',
-  'data': {'actual': 'int', '*mem_swapped_in': 'int',
-           '*mem_swapped_out': 'int', '*major_page_faults': 'int',
-           '*minor_page_faults': 'int', '*free_mem': 'int',
-           '*total_mem': 'int'} }
+{ 'type': 'BalloonInfo', 'data': {'actual': 'int' } }
 
 ##
 # @query-balloon:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cbf1280..3be5330 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2549,13 +2549,6 @@ Make an asynchronous request for balloon info. When the request completes a
 json-object will be returned containing the following data:
 
 - "actual": current balloon value in bytes (json-int)
-- "mem_swapped_in": Amount of memory swapped in bytes (json-int, optional)
-- "mem_swapped_out": Amount of memory swapped out in bytes (json-int, optional)
-- "major_page_faults": Number of major faults (json-int, optional)
-- "minor_page_faults": Number of minor faults (json-int, optional)
-- "free_mem": Total amount of free and unused memory in
-              bytes (json-int, optional)
-- "total_mem": Total amount of available memory in bytes (json-int, optional)
 
 Example:
 
@@ -2563,12 +2556,6 @@ Example:
 <- {
       "return":{
          "actual":1073741824,
-         "mem_swapped_in":0,
-         "mem_swapped_out":0,
-         "major_page_faults":142,
-         "minor_page_faults":239245,
-         "free_mem":1014185984,
-         "total_mem":1044668416
       }
    }
 
commit 58513bde833804bc9395d79fd81aae631b97c348
Author: Jeff Cody <jcody at redhat.com>
Date:   Fri Jan 18 12:45:35 2013 -0500

    block: Monitor command commit neglects to report some errors
    
    The non-live bdrv_commit() function may return one of the following
    errors: -ENOTSUP, -EBUSY, -EACCES, -EIO.  The only error that is
    checked in the HMP handler is -EBUSY, so the monitor command 'commit'
    silently fails for all error cases other than 'Device is in use'.
    
    Report error using monitor_printf() and strerror(), and convert existing
    qerror_report() calls in do_commit() to monitor_printf().
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Markus Armbruster <armbru at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 9126587..030070b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -642,21 +642,17 @@ void do_commit(Monitor *mon, const QDict *qdict)
 
     if (!strcmp(device, "all")) {
         ret = bdrv_commit_all();
-        if (ret == -EBUSY) {
-            qerror_report(QERR_DEVICE_IN_USE, device);
-            return;
-        }
     } else {
         bs = bdrv_find(device);
         if (!bs) {
-            qerror_report(QERR_DEVICE_NOT_FOUND, device);
+            monitor_printf(mon, "Device '%s' not found\n", device);
             return;
         }
         ret = bdrv_commit(bs);
-        if (ret == -EBUSY) {
-            qerror_report(QERR_DEVICE_IN_USE, device);
-            return;
-        }
+    }
+    if (ret < 0) {
+        monitor_printf(mon, "'commit' error for '%s': %s\n", device,
+                       strerror(-ret));
     }
 }
 
commit f9e90c798dc84be2b0b988517e86b49faf79d5dc
Author: Marcelo Tosatti <mtosatti at redhat.com>
Date:   Thu Jan 10 23:02:48 2013 -0200

    vmxcap: bit 9 of VMX_PROCBASED_CTLS2 is 'virtual interrupt delivery'
    
    Bit 9 of MSR_IA32_VMX_PROCBASED_CTLS2 is
    virtual interrupt delivery.
    
    Signed-off-by: Marcelo Tosatti <mtosatti at redhat.com>

diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index cbe6440..0b23f77 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -147,6 +147,7 @@ controls = [
             5: 'Enable VPID',
             6: 'WBINVD exiting',
             7: 'Unrestricted guest',
+            9: 'Virtual interrupt delivery',
             10: 'PAUSE-loop exiting',
             11: 'RDRAND exiting',
             12: 'Enable INVPCID',