Description of problem: Qemu hang when do snapshots on multi-disks with one job's 'job-id' missed in transaction mode Version-Release number of selected component (if applicable): kernel version:4.18.0-160.el8.x86_64 qemu-kvm version:qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64 How reproducible: 100% Steps to Reproduce: 1.Start guest with qemu cmds: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x1 \ -m 7168 \ -smp 4,maxcpus=4,cores=2,threads=1,dies=1,sockets=2 \ -cpu 'Skylake-Client',+kvm_pv_unhalt \ -chardev socket,id=qmp_id_qmpmonitor1,path=/var/tmp/monitor-qmpmonitor1-20191210-025743-Q0JzJpKT,server,nowait \ -mon chardev=qmp_id_qmpmonitor1,mode=control \ -chardev socket,id=qmp_id_catch_monitor,path=/var/tmp/monitor-catch_monitor-20191210-025743-Q0JzJpKT,server,nowait \ -mon chardev=qmp_id_catch_monitor,mode=control \ -device pvpanic,ioport=0x505,id=idDU2Q2E \ -chardev socket,server,path=/var/tmp/serial-serial0-20191210-025743-Q0JzJpKT,id=chardev_serial0,nowait \ -device isa-serial,id=serial0,chardev=chardev_serial0 \ -chardev socket,id=seabioslog_id_20191210-025743-Q0JzJpKT,path=/var/tmp/seabios-20191210-025743-Q0JzJpKT,server,nowait \ -device isa-debugcon,chardev=seabioslog_id_20191210-025743-Q0JzJpKT,iobase=0x402 \ -device pcie-root-port,id=pcie.0-root-port-2,slot=2,chassis=2,addr=0x2,bus=pcie.0,multifunction=on \ -device qemu-xhci,id=usb1,bus=pcie.0-root-port-2,addr=0x0 \ -object iothread,id=iothread0 \ -object iothread,id=iothread1 \ -device pcie-root-port,id=pcie.0-root-port-3,slot=3,chassis=3,addr=0x2.0x1,bus=pcie.0 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,iothread=iothread0,bus=pcie.0-root-port-3,addr=0x0 \ -blockdev node-name=file_image1,driver=file,aio=threads,filename=/home/kvm_autotest_root/images/rhel820-64-virtio-scsi.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -device pcie-root-port,id=pcie.0-root-port-4,slot=4,chassis=4,addr=0x2.0x2,bus=pcie.0 \ -blockdev node-name=file_data1,driver=file,aio=threads,filename=/home/data.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_data1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_data1 \ -device virtio-blk-pci,id=data1,drive=drive_data1,write-cache=on,bus=pcie.0-root-port-4,iothread=iothread1 \ -device pcie-root-port,id=pcie.0-root-port-5,slot=5,chassis=5,addr=0x2.0x3,bus=pcie.0 \ -device virtio-net-pci,mac=9a:9b:1d:13:61:86,id=idKg9AzR,netdev=idxDM2m8,bus=pcie.0-root-port-5,addr=0x0 \ -netdev tap,id=idxDM2m8,vhost=on \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -monitor stdio \ -device pcie-root-port,id=pcie_extra_root_port_0,slot=6,chassis=6,addr=0x2.0x4,bus=pcie.0 \ -qmp tcp:0:3000,server,nowait \ 2. Create backup targets:sn1,sn2 {'execute':'blockdev-create','arguments':{'options': {'driver':'file','filename':'/root/sn$i','size':21474836480},'job-id':'job1'}} {'execute':'blockdev-add','arguments':{'driver':'file','node-name':'drive_sn$i','filename':'/root/sn$i'}} {'execute':'blockdev-create','arguments':{'options': {'driver': 'qcow2','file':'drive_sn$i','size':21474836480},'job-id':'job2'}} {'execute':'blockdev-add','arguments':{'driver':'qcow2','node-name':'sn$i','file':'drive_sn$i'}} {'execute':'job-dismiss','arguments':{'id':'job1'}} {'execute':'job-dismiss','arguments':{'id':'job2'}} 3.Do full backup on drive_image1 and drive_data1, with 'job-id' of one job missed. { "execute": "transaction", "arguments": { "actions": [ {"type": "blockdev-backup", "data": { "device": "drive_image1", "target": "sn1", "sync": "full", "job-id":"j1" } }, {"type": "blockdev-backup", "data": { "device": "drive_data1", "target": "sn2", "sync": "full"}}]}} {"timestamp": {"seconds": 1576050192, "microseconds": 447444}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "j1"}} Actual results: After step3, qemu hang. # gdb -p 16285 (gdb) bt #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=<optimized out>, timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/bits/poll2.h:77 #2 0x000055e743386e09 in qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=<optimized out>) at util/qemu-timer.c:336 #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) at util/aio-posix.c:669 #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 #6 0x000055e7432be58e in bdrv_delete (bs=<optimized out>) at block.c:4262 #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 #9 0x000055e74331461f in backup_job_create (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 #10 0x000055e74315bc52 in do_backup_common (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) at blockdev.c:3580 #11 0x000055e74315c37c in do_blockdev_backup (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 --Type <RET> for more, q to quit, c to continue without paging-- #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) at blockdev.c:1885 #13 0x000055e743160152 in qmp_transaction (dev_list=<optimized out>, has_props=<optimized out>, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 #14 0x000055e743287ff5 in qmp_marshal_transaction (args=<optimized out>, ret=<optimized out>, errp=0x7ffddfd1f0f8) at qapi/qapi-commands-transaction.c:44 #15 0x000055e74333de6c in do_qmp_dispatch (errp=0x7ffddfd1f0f0, allow_oob=<optimized out>, request=<optimized out>, cmds=0x55e743c28d60 <qmp_commands>) at qapi/qmp-dispatch.c:132 #16 0x000055e74333de6c in qmp_dispatch (cmds=0x55e743c28d60 <qmp_commands>, request=<optimized out>, allow_oob=<optimized out>) at qapi/qmp-dispatch.c:175 #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=<optimized out>) at monitor/qmp.c:145 #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=<optimized out>) at monitor/qmp.c:234 #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 #22 0x000055e743385742 in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:260 #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=<optimized out>) at util/main-loop.c:242 --Type <RET> for more, q to quit, c to continue without paging-- #27 0x000055e743387d08 in main_loop_wait (nonblocking=<optimized out>) at util/main-loop.c:518 #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 #29 0x000055e743016a72 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4504 Expected results: According to doc "qapi/block-core.json", the job should executed successfully with device name.nn # @BackupCommon: # # @job-id: identifier for the newly-created block job. If # omitted, the device name will be used. (Since 2.7) Additional info:
Correct bug's title to "Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode"
It's a hang issue, set bug's priority to "high".
Test with data plane disabled, job will failed with invalid job id error: { "execute": "transaction", "arguments": { "actions": [ {"type": "blockdev-backup", "data": { "device": "drive_image1", "target": "sn1", "sync": "full", "job-id":"j1" } }, {"type": "blockdev-backup", "data": { "device": "drive_data1", "target": "sn2", "sync": "full"}}]}} {"timestamp": {"seconds": 1576051775, "microseconds": 52352}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "j1"}} {"timestamp": {"seconds": 1576051775, "microseconds": 70253}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "j1"}} {"timestamp": {"seconds": 1576051775, "microseconds": 70312}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "j1"}} {"timestamp": {"seconds": 1576051775, "microseconds": 70343}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "j1"}} {"error": {"class": "GenericError", "desc": "Invalid job ID ''"}}
QEMU has been recently split into sub-components and as a one-time operation to avoid breakage of tools, we are setting the QEMU sub-component of this BZ to "General". Please review and change the sub-component if necessary the next time you review this BZ. Thanks
Verified on qemu-kvm-4.2.0-10.module+el8.2.0+5740+c3dff59e,the problem has been resolved, so set bug's status to "Verified". Test steps: 1.Start guest with qemu cmds: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x1 \ -m 7168 \ -smp 4,maxcpus=4,cores=2,threads=1,dies=1,sockets=2 \ -cpu 'Skylake-Client',+kvm_pv_unhalt \ -chardev socket,id=qmp_id_qmpmonitor1,path=/var/tmp/monitor-qmpmonitor1-20191210-025743-Q0JzJpKT,server,nowait \ -mon chardev=qmp_id_qmpmonitor1,mode=control \ -chardev socket,id=qmp_id_catch_monitor,path=/var/tmp/monitor-catch_monitor-20191210-025743-Q0JzJpKT,server,nowait \ -mon chardev=qmp_id_catch_monitor,mode=control \ -device pvpanic,ioport=0x505,id=idDU2Q2E \ -chardev socket,server,path=/var/tmp/serial-serial0-20191210-025743-Q0JzJpKT,id=chardev_serial0,nowait \ -device isa-serial,id=serial0,chardev=chardev_serial0 \ -chardev socket,id=seabioslog_id_20191210-025743-Q0JzJpKT,path=/var/tmp/seabios-20191210-025743-Q0JzJpKT,server,nowait \ -device isa-debugcon,chardev=seabioslog_id_20191210-025743-Q0JzJpKT,iobase=0x402 \ -device pcie-root-port,id=pcie.0-root-port-2,slot=2,chassis=2,addr=0x2,bus=pcie.0,multifunction=on \ -device qemu-xhci,id=usb1,bus=pcie.0-root-port-2,addr=0x0 \ -object iothread,id=iothread0 \ -object iothread,id=iothread1 \ -device pcie-root-port,id=pcie.0-root-port-3,slot=3,chassis=3,addr=0x2.0x1,bus=pcie.0 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,iothread=iothread0,bus=pcie.0-root-port-3,addr=0x0 \ -blockdev node-name=file_image1,driver=file,aio=threads,filename=/home/kvm_autotest_root/images/rhel820-64-virtio-scsi.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -device pcie-root-port,id=pcie.0-root-port-4,slot=4,chassis=4,addr=0x2.0x2,bus=pcie.0 \ -blockdev node-name=file_data1,driver=file,aio=threads,filename=/home/data.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_data1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_data1 \ -device virtio-blk-pci,id=data1,drive=drive_data1,write-cache=on,bus=pcie.0-root-port-4,iothread=iothread1 \ -device pcie-root-port,id=pcie.0-root-port-5,slot=5,chassis=5,addr=0x2.0x3,bus=pcie.0 \ -device virtio-net-pci,mac=9a:9b:1d:13:61:86,id=idKg9AzR,netdev=idxDM2m8,bus=pcie.0-root-port-5,addr=0x0 \ -netdev tap,id=idxDM2m8,vhost=on \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -monitor stdio \ -device pcie-root-port,id=pcie_extra_root_port_0,slot=6,chassis=6,addr=0x2.0x4,bus=pcie.0 \ -qmp tcp:0:3000,server,nowait \ 2. Create backup targets:sn1,sn2 {'execute':'blockdev-create','arguments':{'options': {'driver':'file','filename':'/root/sn$i','size':21474836480},'job-id':'job1'}} {'execute':'blockdev-add','arguments':{'driver':'file','node-name':'drive_sn$i','filename':'/root/sn$i'}} {'execute':'blockdev-create','arguments':{'options': {'driver': 'qcow2','file':'drive_sn$i','size':21474836480},'job-id':'job2'}} {'execute':'blockdev-add','arguments':{'driver':'qcow2','node-name':'sn$i','file':'drive_sn$i'}} {'execute':'job-dismiss','arguments':{'id':'job1'}} {'execute':'job-dismiss','arguments':{'id':'job2'}} 3.Do full backup on drive_image1 and drive_data1, with 'job-id' of one job missed. { "execute": "transaction", "arguments": { "actions": [ {"type": "blockdev-backup", "data": { "device": "drive_image1", "target": "sn1", "sync": "full", "job-id":"j1" } }, {"type": "blockdev-backup", "data": { "device": "drive_data1", "target": "sn2", "sync": "full"}}]}} {"timestamp": {"seconds": 1582080998, "microseconds": 912816}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "j1"}} {"timestamp": {"seconds": 1582080998, "microseconds": 913089}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "j1"}} {"timestamp": {"seconds": 1582080998, "microseconds": 913139}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "j1"}} {"timestamp": {"seconds": 1582080998, "microseconds": 913189}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "j1"}} {"error": {"class": "GenericError", "desc": "Invalid job ID ''"}}
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory, and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:2017