Hide Forgot
+++ This bug was initially created as a clone of Bug #2009236 +++ Description of problem: Qemu coredump when backup with x-perf Version-Release number of selected component (if applicable): kernel version:4.18.0-345.1.el8.x86_64 qemu-kvm version:qemu-kvm-6.1.0-1.module+el8.6.0+12721+8d053ff2 How reproducible: 100% Steps to Reproduce: 1.Start guest with qemu cmds: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35,memory-backend=mem-machine_mem \ -device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 \ -device pcie-pci-bridge,id=pcie-pci-bridge-0,addr=0x0,bus=pcie-root-port-0 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x2 \ -m 30720 \ -object memory-backend-ram,size=30720M,id=mem-machine_mem \ -smp 10,maxcpus=10,cores=5,threads=1,dies=1,sockets=2 \ -cpu 'Cascadelake-Server-noTSX',+kvm_pv_unhalt \ -chardev socket,server=on,id=qmp_id_qmpmonitor1,wait=off,path=/tmp/monitor-qmpmonitor1-20210927-235344-uQJJ6aFc \ -mon chardev=qmp_id_qmpmonitor1,mode=control \ -chardev socket,server=on,id=qmp_id_catch_monitor,wait=off,path=/tmp/monitor-catch_monitor-20210927-235344-uQJJ6aFc \ -mon chardev=qmp_id_catch_monitor,mode=control \ -device pvpanic,ioport=0x505,id=idmerT3z \ -chardev socket,server=on,id=chardev_serial0,wait=off,path=/tmp/serial-serial0-20210927-235344-uQJJ6aFc \ -device isa-serial,id=serial0,chardev=chardev_serial0 \ -chardev socket,id=seabioslog_id_20210927-235344-uQJJ6aFc,path=/tmp/seabios-20210927-235344-uQJJ6aFc,server=on,wait=off \ -device isa-debugcon,chardev=seabioslog_id_20210927-235344-uQJJ6aFc,iobase=0x402 \ -device pcie-root-port,id=pcie-root-port-1,port=0x1,addr=0x1.0x1,bus=pcie.0,chassis=2 \ -device qemu-xhci,id=usb1,bus=pcie-root-port-1,addr=0x0 \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -device pcie-root-port,id=pcie-root-port-2,port=0x2,addr=0x1.0x2,bus=pcie.0,chassis=3 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,bus=pcie-root-port-2,addr=0x0 \ -blockdev node-name=file_image1,driver=file,auto-read-only=on,discard=unmap,aio=threads,filename=/home/kvm_autotest_root/images/rhel850-64-virtio-scsi.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,read-only=off,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -device pcie-root-port,id=pcie-root-port-3,port=0x3,addr=0x1.0x3,bus=pcie.0,chassis=4 \ -device virtio-net-pci,mac=9a:c0:90:a7:2f:40,id=idjZ1FPm,netdev=id4rOUG1,bus=pcie-root-port-3,addr=0x0 \ -netdev tap,id=id4rOUG1,vhost=on \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -device pcie-root-port,id=pcie_extra_root_port_0,multifunction=on,bus=pcie.0,addr=0x3,chassis=5 \ -monitor stdio \ -qmp tcp:0:3000,server=on,wait=off \ 2. Create backup target node {'execute':'blockdev-create','arguments':{'options': {'driver':'file','filename':'/root/sn$i','size':21474836480},'job-id':'job1'}} {'execute':'blockdev-add','arguments':{'driver':'file','node-name':'drive_sn$i','filename':'/root/sn$i'}} {'execute':'blockdev-create','arguments':{'options': {'driver': 'qcow2','file':'drive_sn$i','size':21474836480},'job-id':'job2'}} {'execute':'blockdev-add','arguments':{'driver':'qcow2','node-name':'sn$i','file':'drive_sn$i'}} {'execute':'job-dismiss','arguments':{'id':'job1'}} {'execute':'job-dismiss','arguments':{'id':'job2'}} 3.Do backup with x-perf option {"execute": "blockdev-backup", "arguments": {"device": "drive_image1","job-id": "j1", "sync": "full", "target": "sn1","x-perf":{"use-copy-range":true,"max-workers":4294967296,"max-chunk":4294967296}}} {"timestamp": {"seconds": 1632990503, "microseconds": 785037}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "j1"}} {"timestamp": {"seconds": 1632990503, "microseconds": 785178}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "j1"}} {"timestamp": {"seconds": 1632990503, "microseconds": 785256}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "j1"}} {"timestamp": {"seconds": 1632990503, "microseconds": 785327}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "j1"}} Ncat: Connection reset by peer. Actual results: After step3, qemu coredump. (qemu) qemu-kvm: ../block/aio_task.c:63: aio_task_pool_wait_one: Assertion `pool->busy_tasks > 0' failed. src.txt: line 40: 68683 Aborted (core dumped) /usr/libexec/qemu-kvm -name 'avocado-vt-vm1' -sandbox on -machine q35,memory-backend=mem-machine_mem -device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 .. <pre>(gdb) bt #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 #1 0x00007fc695299db5 in __GI_abort () at abort.c:79 #2 0x00007fc695299c89 in __assert_fail_base (fmt=0x7fc695402698 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x559cc75e9a44 "pool->busy_tasks > 0", file=0x559cc75e9a30 "../block/aio_task.c", line=63, function=<optimized out>) at assert.c:92 #3 0x00007fc6952a7a76 in __GI___assert_fail (assertion=assertion@entry=0x559cc75e9a44 "pool->busy_tasks > 0", file=file@entry=0x559cc75e9a30 "../block/aio_task.c", line=line@entry=63, function=function@entry=0x559cc75e9ad0 <__PRETTY_FUNCTION__.16996> "aio_task_pool_wait_one") at assert.c:101 #4 0x0000559cc73f3511 in aio_task_pool_wait_one (pool=<optimized out>) at ../block/aio_task.c:63 #5 0x0000559cc74457e8 in block_copy_task_run (task=0x559ccac517b0, pool=0x559cc9a43140) at ../block/block-copy.c:398 #6 block_copy_dirty_clusters (call_state=0x559cca6492e0) at ../block/block-copy.c:730 #7 block_copy_common (call_state=0x559cca6492e0) at ../block/block-copy.c:781 #8 0x0000559cc74deb43 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at ../util/coroutine-ucontext.c:173 #9 0x00007fc6952c4f50 in ?? () at ../sysdeps/unix/sysv/linux/x86_64/__start_context.S:91 from /usr/lib64/libc-2.28.so #10 0x00007fff85430220 in ?? () #11 0x0000000000000000 in ?? ()</pre> Expected results: Backup can executed successfully. Additional info: Will add core dump info later. --- Additional comment from aihua liang on 2021-09-30 08:56:23 UTC --- qemu-kvm-6.0.0-30.module+el8.5.0+12586+476da3e1 also hit this issue.
Test Env in RHEL9: kernel version:5.14.0-3.el9.x86_64 qemu-kvm version:qemu-kvm-6.1.0-2.el9
(In reply to aihua liang from comment #0) The problem is an integer overflow related to the `max-workers` parameter: > 3.Do backup with x-perf option > {"execute": "blockdev-backup", "arguments": {"device": > "drive_image1","job-id": "j1", "sync": "full", "target": > "sn1","x-perf":{"use-copy-range":true,"max-workers":4294967296,"max-chunk": > 4294967296}}} QAPI generates a structure and store `max-workers` value in an `int64_t` variable, while in the code (block/aio_task.c - max_busy_tasks) an `int` is used. So the maximum right value should be INT_MAX (2147483647). I'll send a patch adding a check for that with an error message. @aliang@redhat.com Can you check that everything is okay by using `max-workers` values up to 2147483647? I did a quick check and it looks okay in my environment if `max-workers` is between 1 and 2147483647.
Fixes posted upstream: https://lists.nongnu.org/archive/html/qemu-devel/2021-10/msg00934.html
Test backup with following max-workers value, all works ok. "max-workers":2147483647,"max-chunk":2147483647 "max-workers":65536,"max-chunk":65536 "max-workers":1,"max-chunk":1048576 "max-workers":1,"max-chunk":0 So it works ok between 1 and 2147483647.
Fix merged upstream: https://gitlab.com/qemu-project/qemu/-/commit/8fc898ce0b3e7fea8c7c2a8d8977f2a9b77ecebf
Test with qemu-kvm-6.2.0-1.el9, don't hit this issue any more. 1.Start guest with qemu cmds: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35,memory-backend=mem-machine_mem \ -device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 \ -device pcie-pci-bridge,id=pcie-pci-bridge-0,addr=0x0,bus=pcie-root-port-0 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x2 \ -m 30720 \ -object memory-backend-ram,size=30720M,id=mem-machine_mem \ -smp 10,maxcpus=10,cores=5,threads=1,dies=1,sockets=2 \ -cpu 'Cascadelake-Server-noTSX',+kvm_pv_unhalt \ -chardev socket,server=on,id=qmp_id_qmpmonitor1,wait=off,path=/tmp/monitor-qmpmonitor1-20210927-235344-uQJJ6aFc \ -mon chardev=qmp_id_qmpmonitor1,mode=control \ -chardev socket,server=on,id=qmp_id_catch_monitor,wait=off,path=/tmp/monitor-catch_monitor-20210927-235344-uQJJ6aFc \ -mon chardev=qmp_id_catch_monitor,mode=control \ -device pvpanic,ioport=0x505,id=idmerT3z \ -chardev socket,server=on,id=chardev_serial0,wait=off,path=/tmp/serial-serial0-20210927-235344-uQJJ6aFc \ -device isa-serial,id=serial0,chardev=chardev_serial0 \ -chardev socket,id=seabioslog_id_20210927-235344-uQJJ6aFc,path=/tmp/seabios-20210927-235344-uQJJ6aFc,server=on,wait=off \ -device isa-debugcon,chardev=seabioslog_id_20210927-235344-uQJJ6aFc,iobase=0x402 \ -device pcie-root-port,id=pcie-root-port-1,port=0x1,addr=0x1.0x1,bus=pcie.0,chassis=2 \ -device qemu-xhci,id=usb1,bus=pcie-root-port-1,addr=0x0 \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -device pcie-root-port,id=pcie-root-port-2,port=0x2,addr=0x1.0x2,bus=pcie.0,chassis=3 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,bus=pcie-root-port-2,addr=0x0 \ -blockdev node-name=file_image1,driver=file,auto-read-only=on,discard=unmap,aio=threads,filename=/home/kvm_autotest_root/images/rhel900-64-virtio-scsi.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,read-only=off,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -device pcie-root-port,id=pcie-root-port-3,port=0x3,addr=0x1.0x3,bus=pcie.0,chassis=4 \ -device virtio-net-pci,mac=9a:c0:90:a7:2f:40,id=idjZ1FPm,netdev=id4rOUG1,bus=pcie-root-port-3,addr=0x0 \ -netdev tap,id=id4rOUG1,vhost=on \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -device pcie-root-port,id=pcie_extra_root_port_0,multifunction=on,bus=pcie.0,addr=0x3,chassis=5 \ -monitor stdio \ -qmp tcp:0:3000,server=on,wait=off \ 2. Create backup target node {'execute':'blockdev-create','arguments':{'options': {'driver':'file','filename':'/root/sn1','size':21474836480},'job-id':'job1'}} {'execute':'blockdev-add','arguments':{'driver':'file','node-name':'drive_sn1','filename':'/root/sn1'}} {'execute':'blockdev-create','arguments':{'options': {'driver': 'qcow2','file':'drive_sn1','size':21474836480},'job-id':'job2'}} {'execute':'blockdev-add','arguments':{'driver':'qcow2','node-name':'sn$i','file':'drive_sn$i'}} {'execute':'job-dismiss','arguments':{'id':'job1'}} {'execute':'job-dismiss','arguments':{'id':'job2'}} 3.Do backup with x-perf option {"execute": "blockdev-backup", "arguments": {"device": "drive_image1","job-id": "j1", "sync": "full", "target": "sn1","x-perf":{"use-copy-range":true,"max-workers":4294967296,"max-chunk":4294967296}}} In step3, backup can't start with error info: {"error": {"class": "GenericError", "desc": "max-workers must be between 1 and 2147483647"}}
QE bot(pre verify): Set 'Verified:Tested,SanityOnly' as gating/tier1 test pass.
As comment6 and comment 7, set bug's status to "VERIFIED".
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (new packages: qemu-kvm), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2022:2307