Red Hat Bugzilla – Bug 1086172
Migrating Windows guest with virtio-scsi from RHEL-6.5 to 7.0 host broken for machine rhel6.3.0 or older
Last modified: 2014-12-03 17:34:18 EST
Description of problem: Boot Win8-64 guest on RHEL6.5 host and set "param_change=off" of virtio-scsi-pci. Migrate guest from RHEL6.5 host to RHEL7.0 host, the dest qemu-kvm quits and migration failed. Version-Release number of selected component (if applicable): RHEL6.5 host: kernel-2.6.32-456.el6.x86_64 qemu-kvm-0.12.1.2-2.423.el6.x86_64 RHEL7.0 host: kernel-3.10.0-121.el7.x86_64 qemu-kvm-1.5.3-60.el7.x86_64 How reproducible: 100% Steps to Reproduce: 1.full qemu-kvm command line on rhel6.5 & rhel7.0 host /usr/libexec/qemu-kvm -M rhel6.1.0 -cpu Westmere,hv_relaxed -enable-kvm -m 2048 -realtime mlock=off -smp 4,sockets=2,cores=2,threads=1,maxcpus=160 -numa node,cpus=0 -numa node,cpus=1 -numa node,cpus=2 -numa node,cpus=3 -nodefconfig -nodefaults -global PIIX4_PM.disable_s3=0 -global PIIX4_PM.disable_s4=0 -global ide-drive.physical_block_size=4096 -global ide-drive.logical_block_size=4096 -global virtio-blk-pci.physical_block_size=512 -global virtio-blk-pci.logical_block_size=512 -boot order=cdn,once=n,menu=on,strict=on,reboot-timeout=60000 -k en-us -soundhw ac97 -device virtio-balloon-pci,id=ballooning,bus=pci.0,addr=0x5,indirect_desc=on,event_idx=on,multifunction=on,rombar=100 -monitor stdio -name test-all-qemu-kvm-option -uuid `uuidgen` -drive file=/mnt/virtio-blk-disk,if=none,id=drive-virtio-disk,format=raw,cache=none,aio=native,werror=stop,rerror=stop,media=disk,snapshot=off,bus=1,unit=1 -device virtio-blk-pci,scsi=off,drive=drive-virtio-disk,id=virtio-disk,bus=pci.0,addr=0x7,physical_block_size=512,logical_block_size=512,multifunction=on,scsi=on,event_idx=on,indirect_desc=on,vectors=32,x-data-plane=off,ioeventfd=on,serial=fuxc,discard_granularity=1,min_io_size=4096,opt_io_size=4096 -usbdevice tablet -usbdevice mouse -netdev tap,id=hostnet0,vhost=on,script=/etc/ovs-ifup,downscript=/etc/ovs-ifdown,ifname=fuxc-net -device virtio-net-pci,netdev=hostnet0,id=virtio-net-pci0,mac=00:01:02:03:04:05,bus=pci.0,addr=0x9,multifunction=on,status=on,gso=on,ioeventfd=on,vectors=8,indirect_desc=off,event_idx=off,guest_tso4=off,guest_tso6=on,guest_ecn=off,guest_ufo=on,host_tso4=off,host_tso6=on,host_ecn=on,mrg_rxbuf=off,ctrl_vq=on,host_ufo=on,mrg_rxbuf=on,ctrl_rx=on,ctrl_vlan=on,ctrl_rx_extra=on,ctrl_mac_addr=on -netdev tap,id=hostnet1,vhost=off,script=/etc/ovs-ifup,downscript=/etc/ovs-ifdown,ifname=fuxc-net1 -device e1000,netdev=hostnet1,id=virtio-net-pci1,mac=00:01:02:03:04:06,bus=pci.0,addr=0xa,multifunction=off -netdev tap,id=hostnet2,vhost=off,script=/etc/ovs-ifup,downscript=/etc/ovs-ifdown,ifname=fuxc-net-rtl8139 -device rtl8139,netdev=hostnet2,id=virtio-net-pci2,mac=00:01:02:03:04:07,bus=pci.0,addr=0xb,multifunction=off -serial unix:/tmp/monitor2,server,nowait -rtc base=utc -no-shutdown -drive file=/mnt/ide-disk,if=none,id=drive-data-disk,format=raw,cache=none,aio=native,werror=stop,rerror=stop,copy-on-read=off,serial=fux-ide,media=disk -device ide-drive,drive=drive-data-disk,id=system-disk,wwn=0x5000c50015ea71ad,logical_block_size=512,physical_block_size=512,min_io_size=32,opt_io_size=64,discard_granularity=512,ver=fuxc-ver,bus=ide.0,unit=0 -chardev tty,id=serial1,path=/dev/ttyS0 -device isa-serial,chardev=serial1 -chardev socket,id=channel1,path=/tmp/helloworld1,server,nowait -chardev socket,id=channel2,path=/tmp/helloworld2,server,nowait -device virtio-serial-pci,id=virtio-serial0,max_ports=16,vectors=0 -chardev file,id=channel3,path=/mnt/helloworld1.txt -device virtserialport,chardev=channel3,name=com.redhat.rhevm.vdsm1,bus=virtio-serial0.0,id=port1,nr=1 -chardev socket,id=isa-serial-1,path=/tmp/isa-serial-1,server,nowait -device isa-serial,chardev=isa-serial-1 -device ich9-usb-uhci1,id=usb1,addr=0x11 -device usb-tablet,id=input0 -device usb-mouse,id=input1 -global pvpanic.ioport=0x0505 -machine rhel6.1.0,dump-guest-core=off -drive file=/mnt/driver.iso,if=none,media=cdrom,id=drive-ide0-1-0,readonly=on,format=raw -device ide-drive,bus=ide.1,unit=1,drive=drive-ide0-1-0,id=ide0-1-0,wwn=0x5000c50015ea71ade,logical_block_size=512,physical_block_size=512,min_io_size=32,opt_io_size=64,discard_granularity=512,unit=1,ver=fuxc-ver-cdrom,bus=ide.0,unit=1 -drive file=/mnt/win8-64.qcow2,if=none,id=drive-system-disk,format=qcow2,cache=none,aio=native,werror=stop,rerror=stop -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x13,indirect_desc=on,event_idx=off,hotplug=on,param_change=off,num_queues=1,max_sectors=512,cmd_per_lun=16,multifunction=on,rombar=64 -device scsi-hd,drive=drive-system-disk,bus=scsi0.0,scsi-id=0,lun=0,id=data-disk2,logical_block_size=512,physical_block_size=512,min_io_size=512,opt_io_size=512,discard_granularity=512,ver=fuxc-scsi,serial=fuxc-scsi-serial,removable=off,wwn=0x16,channel=0,scsi-id=2,lun=0,bootindex=0 -device sga -spice port=5901,password=redhat-vga,disable-ticketing -vga qxl -global qxl-vga.vram_size=33554432 -device intel-hda,id=sound0,bus=pci.0 -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -chardev socket,path=/tmp/foo,server,nowait,id=foo -drive file=/usr/share/virtio-win/virtio-win_amd64.vfd,if=none,id=drive-fdc0-0-0,readonly=on,format=raw -global isa-fdc.driveA=drive-fdc0-0-0 -vnc :10 2.do migration 3. Actual results: 1. the dest qemu-kvm quits with erro info: (qemu) qemu-kvm: Features 0x6 unsupported. Allowed features: 0x51000002 qemu: warning: error while loading state for instance 0x0 of device '0000:00:13.0/virtio-scsi' load of migration failed KVM: injection failed, MSI lost (Operation not permitted) 2. on src host, check vm/migrate status: (qemu) info status VM status: paused (post-migrate) (qemu) info migrate Migration status: completed total time: 10699 milliseconds downtime: 4695 milliseconds transferred ram: 697192 kbytes remaining ram: 1888 kbytes total ram: 2196040 kbytes 3. on src host, run "cont" and guest can work normally. Expected results: migration is successful and Guest works well Additional info: 1. use same command line and only set "param_change=on" of virtio-scsi-pci, migration is successful. 2. Machine type rhel6.2~rhel6.5 all hit this issue. 3. Migrate RHEL6.5 guest from RHEL6.5 host to RHEL7.0 host with "param_change=off", didn't hit this issue, migration is successful 4. Win8-64 guest migration from RHEL6.5 host to RHEL6.5 host with "param_change=off", also hit this issue. 5. Win8-64 guest migration from RHEL7.0 host to RHEL7.0 host wtih "param_change=off", migration is successful
I also test qemu-kvm-1.5.3-53.el7.x86_64, also hit this issue. So it's not a regression issue..
Boot a Win8-64 guest only with a virtio-scsi disk and set "param_change=off", can reproduece this issue, the command line is as following: /usr/libexec/qemu-kvm -M rhel6.5.0 -cpu Westmere,hv_relaxed -enable-kvm -m 2048 -realtime mlock=off -smp 4,sockets=2,cores=2,threads=1,maxcpus=160 -drive file=/mnt/win8-64.qcow2,if=none,id=drive-system-disk,format=qcow2,cache=none,aio=native,werror=stop,rerror=stop -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x13,indirect_desc=on,event_idx=off,hotplug=on,num_queues=1,max_sectors=512,cmd_per_lun=16,multifunction=on,rombar=64,param_change=off -device scsi-hd,drive=drive-system-disk,bus=scsi0.0,scsi-id=0,lun=0,id=data-disk2,logical_block_size=512,physical_block_size=512,min_io_size=512,opt_io_size=512,discard_granularity=512,ver=fuxc-scsi,serial=fuxc-scsi-serial,removable=off,wwn=0x16,channel=0,scsi-id=2,lun=0,bootindex=0 -vnc :10 -monitor stdio -nodefconfig -net none
Fails with a Win8-64 guest, but not a RHEL-6.5 guest. Intriguing. param_change isn't meant to be used on the command line, it's for the machine type compatibility machinery only. Machine type rhel6.3.0 and older set param_change off. Newer types set param_change on. Could you please re-run your reproducer without setting param_change on the command line? At least machine type rhel6.3.0 and rhel6.5.0, but preferably all of them. At least with your Win8-64 guest, but preferably also with your RHEL-6.5 guest. Running all these tests may take time. Partial test results could help me; so if getting full results take an extra day or more, post partial results as they become available. I guess you'll observe failure for rhel6.3.0 and older, exactly like you did with an explicit param_change=off, and success for newer machine types, exactly like you did with param_change=on. Thanks in advance!
> > Could you please re-run your reproducer without setting param_change on the > command line? At least machine type rhel6.3.0 and rhel6.5.0, but preferably > all of them. At least with your Win8-64 guest, but preferably also with > your RHEL-6.5 guest. > I migrate win8-64 guest and rhel6.5-64 guest from rhel6.5 host to rhel7.0 host without setting param_change on the command line. The results are as following: guest: win8-64 host machine type result rhel6.5->rhel7.0 -M rhel6.1.0 migration failed[1] rhel6.5->rhel7.0 -M rhel6.2.0 migration failed[1] rhel6.5->rhel7.0 -M rhel6.3.0 migration failed[1] rhel6.5->rhel7.0 -M rhel6.4.0 migration success[2] rhel6.5->rhel7.0 -M rhel6.5.0 migration success[2] guest: rhel6.5-64 host machine type result rhel6.5->rhel7.0 -M rhel6.1.0 migration success[2] rhel6.5->rhel7.0 -M rhel6.2.0 migration success[2] rhel6.5->rhel7.0 -M rhel6.3.0 migration success[2] rhel6.5->rhel7.0 -M rhel6.4.0 migration success[2] rhel6.5->rhel7.0 -M rhel6.5.0 migration success[2] [1] the dest qemu-kvm quits with error info (qemu) qemu-kvm: Features 0x6 unsupported. Allowed features: 0x51000002 qemu: warning: error while loading state for instance 0x0 of device '0000:00:13.0/virtio-scsi' load of migration failed [2] there is no error in the dest qemu-kvm. in the dest qemu-kvm: (qemu) info status VM status: running The command line used is as following: /usr/libexec/qemu-kvm -M rhelxxx -cpu Westmere,hv_relaxed -enable-kvm -m 2048 -realtime mlock=off -smp 4,sockets=2,cores=2,threads=1,maxcpus=160 -drive file=/mnt/RHEL-Server-6.5-64.qcow2.qcow2,if=none,id=drive-system-disk,format=qcow2,cache=none,aio=native,werror=stop,rerror=stop -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x13,indirect_desc=on,event_idx=off,hotplug=on,num_queues=1,max_sectors=512,cmd_per_lun=16,multifunction=on,rombar=64 -device scsi-hd,drive=drive-system-disk,bus=scsi0.0,scsi-id=0,lun=0,id=data-disk2,logical_block_size=512,physical_block_size=512,min_io_size=512,opt_io_size=512,discard_granularity=512,ver=fuxc-scsi,serial=fuxc-scsi-serial,removable=off,wwn=0x16,channel=0,scsi-id=2,lun=0,bootindex=0 -vnc :10 -monitor stdio -nodefconfig -net none
That was quick; thanks!
To assess the severity of this bug, we need to know whether it bites only with Windows 8. Could you please try one of the failing test cases, say rhel6.5->rhel7.0 -M rhel6.3.0, with a Windows 7 guest?
Thanks Markus. Hi Huding, Can you have a try and update the issue in the bz? Best Regards, Junyi
(In reply to Markus Armbruster from comment #8) > To assess the severity of this bug, we need to know whether it bites only > with Windows 8. Could you please try one of the failing test cases, say > rhel6.5->rhel7.0 -M rhel6.3.0, with a Windows 7 guest? I use the commd line of comment6 to test windows 7 32bit and 64bit guest, also hit this issue. host guest machine type result rhel6.5->rhel7.0 win7-32 -M rhel6.3.0 migration failed[1] rhel6.5->rhel7.0 win7-64 -M rhel6.3.0 migration failed[1] [1] the dest qemu-kvm quits with error info (qemu) qemu-kvm: Features 0x6 unsupported. Allowed features: 0x51000002 qemu: warning: error while loading state for instance 0x0 of device '0000:00:13.0/virtio-scsi' load of migration failed
virtio-scsi with -rhel6.3.0 isn't supported. Closing BZ.
(In reply to Andrew Cathrow from comment #11) > virtio-scsi with -rhel6.3.0 isn't supported. It's ok in QE side if we could make sure the real customer can not hit this problem. Best Regards, Junyi > > Closing BZ.
Additional information on "isn't supported": virtio-scsi came out of tech preview in RHEL-6.4. We don't support it with older machine types even on newer hosts. Example: Charlie decides to check out the virtio-scsi tech preview in his brand new RHEL-6.3 host. He creates a RHEL-6.3 guest and a Windows 8 guest using it, and everything works fine. Time passes, RHEL-6.4+ comes out. Charlie upgrades his host and guests to the latest software. Everything still works fine. Charlie puts the guests into production. Time passes, RHEL-7.0 comes out. Charlie installs it on a new host, then attempts to migrate his guests. Migrating the RHEL-6 guest succeeds, but the Windows 8 guest fails. This is not supported. Charlie should have upgraded his *machine type* in addition to his host & guest software before putting his guests into production.
Re comment#12: I have no idea whether customers are prone to add virtio-scsi devices to old machine types. I don't know how to best "make sure the real customer can not hit this problem". The migration problem exists because we go out of our way to preserve ABI in an unsupported case: we switch param_change off for machine types rhel6.3.0 and older. Looks like this creates as much of a problem as it solves. And both the created and solved problem are with unsupported usage. Makes me wonder whether compatibility properties for unsupported (machine type, device, property) triples make sense at all.
(In reply to Markus Armbruster from comment #13) > Additional information on "isn't supported": virtio-scsi came out of > tech preview in RHEL-6.4. We don't support it with older machine > types even on newer hosts. > > Example: Charlie decides to check out the virtio-scsi tech preview in > his brand new RHEL-6.3 host. He creates a RHEL-6.3 guest and a > Windows 8 guest using it, and everything works fine. > > Time passes, RHEL-6.4+ comes out. Charlie upgrades his host and > guests to the latest software. Everything still works fine. Charlie > puts the guests into production. > > Time passes, RHEL-7.0 comes out. Charlie installs it on a new host, > then attempts to migrate his guests. Migrating the RHEL-6 guest > succeeds, but the Windows 8 guest fails. > > This is not supported. Charlie should have upgraded his *machine > type* in addition to his host & guest software before putting his > guests into production. Make sense and thx for your extra explanations. Best Regards, Junyi
(In reply to Markus Armbruster from comment #14) > Re comment#12: I have no idea whether customers are prone to add > virtio-scsi devices to old machine types. I don't know how to best > "make sure the real customer can not hit this problem". Can we document it in release notes or some place else? > > The migration problem exists because we go out of our way to preserve > ABI in an unsupported case: we switch param_change off for machine > types rhel6.3.0 and older. Looks like this creates as much of a > problem as it solves. Every coin has 2 sides. Best Regards, Junyi And both the created and solved problem are > with unsupported usage. > > Makes me wonder whether compatibility properties for unsupported > (machine type, device, property) triples make sense at all.
The root cause of the bug is still unknown. We might want to figure it out, just to make sure there are no surprises. I'm leaving that decision to Paolo.
Doc text change: s/requires a reboot/requires restarting the VM (guest reboot)/
No, it should be the same virtio-win bug.