Bug 1539537
| Summary: | Guest hung after hot-unplug a disk with backend of NBD and iothread. | ||
|---|---|---|---|
| Product: | Red Hat Enterprise Linux 7 | Reporter: | Yongxue Hong <yhong> |
| Component: | qemu-kvm-rhev | Assignee: | Eric Blake <eblake> |
| Status: | CLOSED DUPLICATE | QA Contact: | aihua liang <aliang> |
| Severity: | medium | Docs Contact: | |
| Priority: | high | ||
| Version: | 7.5 | CC: | aliang, areis, chayang, coli, juzhang, knoel, lolyu, michen, qzhang, stefanha, virt-maint, xuwei, yhong, yilzhang |
| Target Milestone: | rc | ||
| Target Release: | --- | ||
| Hardware: | All | ||
| OS: | Linux | ||
| Whiteboard: | |||
| Fixed In Version: | Doc Type: | If docs needed, set a value | |
| Doc Text: | Story Points: | --- | |
| Clone Of: | Environment: | ||
| Last Closed: | 2018-01-30 05:20:40 UTC | Type: | Bug |
| Regression: | --- | Mount Type: | --- |
| Documentation: | --- | CRM: | |
| Verified Versions: | Category: | --- | |
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |
| Cloudforms Team: | --- | Target Upstream Version: | |
| Embargoed: | |||
The operation of NBD server: qemu-img create -f qcow2 -o preallocation=falloc rhel-alt-75-ppc64le-virtio-scsi-30G.qcow2 30G qemu-nbd -f raw rhel-alt-75-ppc64le-virtio-scsi-30G.qcow2 -p 10086 -t -e 10 & for i in `seq 0 10`; do qemu-img create -f qcow2 -o preallocation=falloc disk-test$i.5G.qcow2 5G; done for i in `seq 0 10`; do qemu-nbd -f raw disk-test$i.5G.qcow2 -p 2000$i -t -e 10 & done Yongxue, Does this bug also happen on Power8 and x86? Is this a regression? Thanks, Qunfang It is also reproduced on x86_64.
[root@ibm-x3850x5-07 yhong]# cat guest.sh
/usr/libexec/qemu-kvm \
-name 'avocado-vt-vm1' \
-sandbox off \
-machine pc \
-nodefaults \
-vga cirrus \
-object iothread,id=iothread0 \
-object iothread,id=iothread1 \
-device virtio-scsi-pci,id=scsi0,iothread=iothread0 \
-device virtio-scsi-pci,id=scsi1,iothread=iothread1 \
-device virtio-scsi-pci,id=scsi2 \
-chardev socket,id=qmp_id_qmpmonitor1,path=/var/tmp/monitor-qmpmonitor1,server,nowait \
-mon chardev=qmp_id_qmpmonitor1,mode=control \
-device pvpanic,ioport=0x505,id=iduHVO78 \
-chardev socket,id=serial_id_serial0,path=/var/tmp/serial-serial0,server,nowait \
-device isa-serial,chardev=serial_id_serial0 \
-chardev socket,id=seabioslog_id_20180129-013808-Mj5o0JZF,path=/var/tmp/seabios,server,nowait \
-device isa-debugcon,chardev=seabioslog_id_20180129-013808-Mj5o0JZF,iobase=0x402 \
-drive file=nbd:10.66.9.109:10086,format=qcow2,aio=native,if=none,cache=none,media=disk,werror=stop,rerror=stop,id=drive_system \
-device scsi-hd,bus=scsi0.0,drive=drive_system,id=system,bootindex=0 \
-drive file=nbd:10.66.9.109:20001,format=qcow2,aio=native,if=none,cache=none,media=disk,werror=stop,rerror=stop,id=drive_data1 \
-device scsi-hd,bus=scsi1.0,drive=drive_data1,id=data1 \
-drive file=nbd:10.66.9.109:20002,format=qcow2,aio=native,if=none,cache=none,media=disk,werror=stop,rerror=stop,id=drive_data2 \
-device scsi-hd,bus=scsi2.0,drive=drive_data2,id=data2 \
-device virtio-net-pci,mac=9a:1a:1b:1c:1d:1e,id=idHuzuRL,vectors=4,netdev=idYunEv6,bus=pci.0 \
-netdev tap,id=idYunEv6,vhost=on \
-device ich9-usb-ehci1,id=usb1,addr=0x1d.7,multifunction=on,bus=pci.0 \
-device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \
-m 8192 \
-smp 16,maxcpus=16,cores=8,threads=1,sockets=2 \
-vnc :0 \
-rtc base=utc,clock=host,driftfix=slew \
-boot menu=off,strict=off,order=cdn,once=d \
-enable-kvm \
-monitor stdio
[root@ibm-x3850x5-07 ~]# nc -U /var/tmp/monitor-qmpmonitor1
{"QMP": {"version": {"qemu": {"micro": 0, "minor": 10, "major": 2}, "package": "(qemu-kvm-rhev-2.10.0-18.el7)"}, "capabilities": []}}
{'execute':'qmp_capabilities'}
{"return": {}}
{'execute':'device_del','arguments':{'id':'scsi1'}}
{"return": {}}
Guest hung.
host : 3.10.0-837.el7.x86_64
guest : 3.10.0-837.el7.x86_64
qemu : qemu-kvm-rhev-2.10.0-18.el7.x86_64
GDB TRACE :
(gdb) bt
#0 0x00007fb8cb9d04cd in __lll_lock_wait () at /lib64/libpthread.so.0
#1 0x00007fb8cb9cbdcb in _L_lock_812 () at /lib64/libpthread.so.0
#2 0x00007fb8cb9cbc98 in pthread_mutex_lock () at /lib64/libpthread.so.0
#3 0x00005604f78e0f8f in qemu_mutex_lock (mutex=mutex@entry=0x5604f7ea6ac0 <qemu_global_mutex>)
at util/qemu-thread-posix.c:65
#4 0x00005604f75f89dc in qemu_mutex_lock_iothread () at /usr/src/debug/qemu-2.10.0/cpus.c:1581
#5 0x00005604f78de54f in main_loop_wait (timeout=2728533885) at util/main-loop.c:258
#6 0x00005604f78de54f in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:515
#7 0x00005604f75be8da in main () at vl.c:1937
#8 0x00005604f75be8da in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>)
at vl.c:4828
(gdb) bt full
#0 0x00007fb8cb9d04cd in __lll_lock_wait () at /lib64/libpthread.so.0
#1 0x00007fb8cb9cbdcb in _L_lock_812 () at /lib64/libpthread.so.0
#2 0x00007fb8cb9cbc98 in pthread_mutex_lock () at /lib64/libpthread.so.0
#3 0x00005604f78e0f8f in qemu_mutex_lock (mutex=mutex@entry=0x5604f7ea6ac0 <qemu_global_mutex>)
at util/qemu-thread-posix.c:65
err = <optimized out>
__PRETTY_FUNCTION__ = "qemu_mutex_lock"
__func__ = "qemu_mutex_lock"
#4 0x00005604f75f89dc in qemu_mutex_lock_iothread () at /usr/src/debug/qemu-2.10.0/cpus.c:1581
#5 0x00005604f78de54f in main_loop_wait (timeout=2728533885) at util/main-loop.c:258
context = 0x5604f9228a50
ret = 1
spin_counter = 0
ret = 1
timeout = 4294967295
timeout_ns = <optimized out>
#6 0x00005604f78de54f in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:515
ret = 1
timeout = 4294967295
timeout_ns = <optimized out>
#7 0x00005604f75be8da in main () at vl.c:1937
i = <optimized out>
snapshot = <optimized out>
---Type <return> to continue, or q <return> to quit---
linux_boot = <optimized out>
initrd_filename = <optimized out>
kernel_filename = <optimized out>
kernel_cmdline = <optimized out>
boot_order = <optimized out>
boot_once = 0x5604f9194aa8 "d"
cyls = <optimized out>
heads = <optimized out>
secs = <optimized out>
translation = <optimized out>
opts = <optimized out>
machine_opts = <optimized out>
hda_opts = <optimized out>
icount_opts = <optimized out>
accel_opts = <optimized out>
olist = <optimized out>
optind = 67
optarg = 0x7ffde9aa94d5 "stdio"
loadvm = <optimized out>
machine_class = 0x0
cpu_model = <optimized out>
vga_model = 0x7ffde9aa8e62 "cirrus"
qtest_chrdev = <optimized out>
---Type <return> to continue, or q <return> to quit---
qtest_log = <optimized out>
pid_file = <optimized out>
incoming = <optimized out>
defconfig = <optimized out>
userconfig = <optimized out>
nographic = <optimized out>
display_type = <optimized out>
display_remote = <optimized out>
log_mask = <optimized out>
log_file = <optimized out>
trace_file = <optimized out>
maxram_size = <optimized out>
ram_slots = <optimized out>
vmstate_dump_file = <optimized out>
main_loop_err = 0x0
err = 0x0
list_data_dirs = <optimized out>
bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffde9aa6980}
__func__ = "main"
__FUNCTION__ = "main"
#8 0x00005604f75be8da in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>)
at vl.c:4828
i = <optimized out>
---Type <return> to continue, or q <return> to quit---
snapshot = <optimized out>
linux_boot = <optimized out>
initrd_filename = <optimized out>
kernel_filename = <optimized out>
kernel_cmdline = <optimized out>
boot_order = <optimized out>
boot_once = 0x5604f9194aa8 "d"
cyls = <optimized out>
heads = <optimized out>
secs = <optimized out>
translation = <optimized out>
opts = <optimized out>
machine_opts = <optimized out>
hda_opts = <optimized out>
icount_opts = <optimized out>
accel_opts = <optimized out>
olist = <optimized out>
optind = 67
optarg = 0x7ffde9aa94d5 "stdio"
loadvm = <optimized out>
machine_class = 0x0
cpu_model = <optimized out>
vga_model = 0x7ffde9aa8e62 "cirrus"
---Type <return> to continue, or q <return> to quit---
qtest_chrdev = <optimized out>
qtest_log = <optimized out>
pid_file = <optimized out>
incoming = <optimized out>
defconfig = <optimized out>
userconfig = <optimized out>
nographic = <optimized out>
display_type = <optimized out>
display_remote = <optimized out>
log_mask = <optimized out>
log_file = <optimized out>
trace_file = <optimized out>
maxram_size = <optimized out>
ram_slots = <optimized out>
vmstate_dump_file = <optimized out>
main_loop_err = 0x0
err = 0x0
list_data_dirs = <optimized out>
bdo_queue = {sqh_first = 0x0, sqh_last = 0x7ffde9aa6980}
__func__ = "main"
__FUNCTION__ = "main"
*** This bug has been marked as a duplicate of bug 1487473 *** |
Description of problem: Guest hung after hot-unplug a disk with backend of NBD and iothread. Version-Release number of selected component (if applicable): host: 4.14.0-33.el7a.ppc64le guest: 4.14.0-33.el7a.ppc64le qemu: qemu-kvm-rhev-2.10.0-18.el7.ppc64le How reproducible: 100% Steps to Reproduce: 1.Boot a guest with NBD backend and iothread. e.g: [root@c155f3-u23 commands]# cat guest-9332.sh /usr/libexec/qemu-kvm \ -name 'guest' \ -machine pseries-rhel7.5.0 \ -m 16G \ -nodefaults \ -smp 8,cores=4,threads=2,sockets=1 \ -boot order=cdn,once=d,menu=off,strict=off \ -device nec-usb-xhci,id=xhci0 \ -device usb-tablet,id=usb-tablet0 \ -device usb-kbd,id=usb-kbd0 \ -chardev socket,id=qmp_id_qmpmonitor,path=/var/tmp/qmp-cmd-monitor-yhong,server,nowait \ -mon chardev=qmp_id_qmpmonitor,mode=control \ -chardev socket,id=serial_id_serial,path=/var/tmp/serial-yhong,server,nowait \ -device spapr-vty,reg=0x30000000,chardev=serial_id_serial \ -enable-kvm \ -object iothread,id=iothread0 \ -object iothread,id=iothread1 \ -device virtio-scsi-pci,id=scsi0,iothread=iothread0 \ -device virtio-scsi-pci,id=scsi1,iothread=iothread1 \ -device virtio-scsi-pci,id=scsi2 \ -drive file=nbd:10.0.1.8:10086,format=qcow2,aio=native,if=none,cache=none,media=disk,werror=stop,rerror=stop,id=drive_system \ -device scsi-hd,bus=scsi0.0,drive=drive_system,id=system,bootindex=0 \ -drive file=nbd:10.0.1.8:20001,format=qcow2,aio=native,if=none,cache=none,media=disk,werror=stop,rerror=stop,id=drive_data1 \ -device scsi-hd,bus=scsi1.0,drive=drive_data1,id=data1 \ -drive file=nbd:10.0.1.8:20002,format=qcow2,aio=native,if=none,cache=none,media=disk,werror=stop,rerror=stop,id=drive_data2 \ -device scsi-hd,bus=scsi2.0,drive=drive_data2,id=data2 \ -netdev tap,id=hostnet0,script=/etc/qemu-ifup \ -device virtio-net-pci,netdev=hostnet0,id=virtio-net-pci0,mac=40:f2:e9:5d:9c:03 \ -monitor stdio \ 2.Hot-unplug the drive_data1. e.g: [root@c155f3-u23 ~]# nc -U /var/tmp/qmp-cmd-monitor-yhong {"QMP": {"version": {"qemu": {"micro": 0, "minor": 10, "major": 2}, "package": "(qemu-kvm-rhev-2.10.0-18.el7)"}, "capabilities": []}} {'execute':'qmp_capabilities'} {"return": {}} {'execute':'device_del','arguments':{'id':'scsi1'}} {"return": {}} [root@localhost ~]# [ 45.273116] sd 1:0:0:0: [sdb] Synchronizing SCSI cache [ 45.358264] iommu: Removing device 0000:00:02.0 from group 0 Actual results: Guest hung. Expected results: Guest run normally. Additional info: Hot-unplug the drive_data2 without iothread successfully. {'execute':'device_del','arguments':{'id':'scsi2'}} {"return": {}} {"timestamp": {"seconds": 1517140053, "microseconds": 984937}, "event": "DEVICE_DELETED", "data": {"device": "data2", "path": "/machine/peripheral/data2"}} {"timestamp": {"seconds": 1517140053, "microseconds": 985022}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/scsi2/virtio-backend"}} {"timestamp": {"seconds": 1517140053, "microseconds": 993100}, "event": "DEVICE_DELETED", "data": {"device": "scsi2", "path": "/machine/peripheral/scsi2"}} Guest run normally.