Description of problem: Within a cluster, given two Host, one with TSC scaling enabled and the other one not capable, if a VM is migrated from TSC enabled host, after migration finishes apparently correctly, it will go 100% cpu usage and seconds later will reboot. Both Hosts also have a TSC frequency difference. Version-Release number of selected component (if applicable): - Manager: ovirt-engine-4.5.0.7-0.9 - Hosts: ovirt-host-4.5.0-3 libvirt-8.0.0-5 How reproducible: Always Steps to Reproduce: 1. Setup a cluster with at least a Host with TSC scaling enabled and frequency difference. 2. Migrate a VM from TSC enabled Host to a not enabled one. 3. Once migration is finished, VM will show 100% cpu usage and seconds later will reboot. Actual results: - VM is rebooted, after that, VM works fine on destination Host. Expected results: - VM continue working without issues on destination Host independently of TSC capable or not. Additional info: - VM libvirt log can show "qemu-kvm: warning: TSC frequency mismatch between VM (2194843 kHz) and host (2397221 kHz), and TSC scaling unavailable" - Seems to appear after last upgrade to RHV 4.4. SP1
It looks like a QEMU problem but let's gather a bit more information first: - Raul, could you please provide a qemu-kvm command line of a VM exposing this problem? - What is the exact QEMU version? - Do you have any idea why the reboot occurs? Does the guest OS crash or is it initiated by QEMU? - Is there anything interesting in the QEMU log (besides the "TSC frequency mismatch" warning, which is expected)? - Anything else that could help identify the problem by QEMU developers?
Also, what's the kernel version? I wonder whether it could be related to Bug 2079311 and actually not be related to TSC frequencies or the other bug could perhaps be related to TSC frequencies.
Hello Milan, - We have two VM stucked with CPU at 100%, below are the qemu process parameters [1] [2] - Installed qemu package : qemu-kvm-core-6.2.0-11.module+el8.6.0+14712+f96656d3.x86_64 - No - No - We not observe he issue on RHEL 8.5, a regression? RHEL 8.5 kernel version 4.18.0-348.el8.x86_64 - We observer the bug on RHEL 7.9 and RHEL 8.6 Kernel versions 3.10.0-1160.el7.x86_64 and 4.18.0-372.9.1.el8.x86_64. [1] /usr/libexec/qemu-kvm -name guest=josgutie-rhel79,debug-threads=on -S -object {"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-63-josgutie-rhel79/master-key.aes"} -machine pc-i440fx-rhel7.6.0,usb=off,dump-guest-core=off -accel kvm -cpu Haswell-noTSX,spec-ctrl=on,ssbd=on,md-clear=on,vmx=on,vme=on,f16c=on,rdrand=on,hypervisor=on,arat=on,xsaveopt=on,abm=on -m size=1048576k,slots=16,maxmem=4194304k -overcommit mem-lock=off -smp 1,maxcpus=16,sockets=16,dies=1,cores=1,threads=1 -object {"qom-type":"iothread","id":"iothread1"} -numa node,nodeid=0,cpus=0-15,mem=1024 -uuid bba12c3d-c16f-4b05-8f4c-87bb6095b75b -smbios type=1,manufacturer=Red Hat,product=RHEL,version=8.6-0.1.el8,serial=4c4c4544-0052-4e10-8033-b1c04f535032,uuid=bba12c3d-c16f-4b05-8f4c-87bb6095b75b,sku=8.6.0,family=RHV -smbios type=2,manufacturer=Red Hat,product=RHEL-AV -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=60,server=on,wait=off -mon chardev=charmonitor,id=monitor,mode=control -rtc base=2022-07-11T15:28:58,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -global PIIX4_PM.disable_s3=1 -global PIIX4_PM.disable_s4=1 -boot strict=on -device piix3-usb-uhci,id=ua-b49c9edb-ed6d-471a-b8c8-1ce2ba4df1a2,bus=pci.0,addr=0x1.0x2 -device virtio-scsi-pci,iothread=iothread1,id=ua-1ecebed3-4fe7-4fdd-a1cd-dcb195b2eab8,bus=pci.0,addr=0x5 -device virtio-serial-pci,id=ua-e7562c0b-76e3-49ea-9920-f4b014a69e37,max_ports=16,bus=pci.0,addr=0x6 -device ide-cd,bus=ide.1,unit=0,id=ua-e2135dca-b664-4966-b8e4-c2813feb1684,werror=report,rerror=report -blockdev {"driver":"host_device","filename":"/rhev/data-center/mnt/blockSD/6814fac1-2e9a-4ab3-bbfe-02a14c2049fe/images/80953897-6ef8-4641-bb34-066e91729627/3037d7ea-2567-42cb-9c7f-62ea949b1bc2","aio":"native","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":null} -device scsi-hd,bus=ua-1ecebed3-4fe7-4fdd-a1cd-dcb195b2eab8.0,channel=0,scsi-id=0,lun=0,device_id=80953897-6ef8-4641-bb34-066e91729627,drive=libvirt-1-format,id=ua-80953897-6ef8-4641-bb34-066e91729627,bootindex=1,write-cache=on,serial=80953897-6ef8-4641-bb34-066e91729627,werror=stop,rerror=stop -netdev tap,fd=61,id=hostua-27ecff61-93ea-432c-af92-2b80a8ccbcaa,vhost=on,vhostfd=66 -device virtio-net-pci,host_mtu=1500,netdev=hostua-27ecff61-93ea-432c-af92-2b80a8ccbcaa,id=ua-27ecff61-93ea-432c-af92-2b80a8ccbcaa,mac=00:1a:4a:16:01:0a,bus=pci.0,addr=0x3 -chardev socket,id=charchannel0,fd=57,server=on,wait=off -device virtserialport,bus=ua-e7562c0b-76e3-49ea-9920-f4b014a69e37.0,nr=1,chardev=charchannel0,id=channel0,name=ovirt-guest-agent.0 -chardev socket,id=charchannel1,fd=59,server=on,wait=off -device virtserialport,bus=ua-e7562c0b-76e3-49ea-9920-f4b014a69e37.0,nr=2,chardev=charchannel1,id=channel1,name=org.qemu.guest_agent.0 -chardev spicevmc,id=charchannel2,name=vdagent -device virtserialport,bus=ua-e7562c0b-76e3-49ea-9920-f4b014a69e37.0,nr=3,chardev=charchannel2,id=channel2,name=com.redhat.spice.0 -device usb-tablet,id=input0,bus=ua-b49c9edb-ed6d-471a-b8c8-1ce2ba4df1a2.0,port=1 -audiodev {"id":"audio1","driver":"none"} -spice port=5909,tls-port=5913,addr=10.37.192.44,x509-dir=/etc/pki/vdsm/libvirt-spice,tls-channel=main,tls-channel=display,tls-channel=inputs,tls-channel=cursor,tls-channel=playback,tls-channel=record,tls-channel=smartcard,tls-channel=usbredir,seamless-migration=on -vnc 10.37.192.44:14,password=on,audiodev=audio1 -k en-us -device qxl-vga,id=ua-96c5d2ef-3ccc-4183-b95a-48582031d0d4,ram_size=67108864,vram_size=33554432,vram64_size_mb=0,vgamem_mb=16,max_outputs=1,bus=pci.0,addr=0x2 -device intel-hda,id=ua-9f786cfa-3501-488a-a149-8ed944d0ec15,bus=pci.0,addr=0x4 -device hda-duplex,id=ua-9f786cfa-3501-488a-a149-8ed944d0ec15-codec0,bus=ua-9f786cfa-3501-488a-a149-8ed944d0ec15.0,cad=0,audiodev=audio1 -incoming defer -device virtio-balloon-pci,id=ua-c49101e4-32a3-4625-ad4b-c50a44e83074,bus=pci.0,addr=0x7 -object {"qom-type":"rng-random","id":"objua-be294088-e8e5-40e9-9600-f1b324ed942c","filename":"/dev/urandom"} -device virtio-rng-pci,rng=objua-be294088-e8e5-40e9-9600-f1b324ed942c,id=ua-be294088-e8e5-40e9-9600-f1b324ed942c,bus=pci.0,addr=0x8 -device vmcoreinfo -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on [2] /usr/libexec/qemu-kvm -name guest=josgutie-rhle86,debug-threads=on -S -object {"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain-64-josgutie-rhle86/master-key.aes"} -blockdev {"driver":"file","filename":"/usr/share/OVMF/OVMF_CODE.secboot.fd","node-name":"libvirt-pflash0-storage","auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-pflash0-format","read-only":true,"driver":"raw","file":"libvirt-pflash0-storage"} -blockdev {"driver":"file","filename":"/var/lib/libvirt/qemu/nvram/b99d8772-7c69-4856-b05a-10ad07b6cbcb.fd","node-name":"libvirt-pflash1-storage","auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-pflash1-format","read-only":false,"driver":"raw","file":"libvirt-pflash1-storage"} -machine pc-q35-rhel8.6.0,usb=off,dump-guest-core=off,pflash0=libvirt-pflash0-format,pflash1=libvirt-pflash1-format -accel kvm -cpu Haswell-noTSX,spec-ctrl=on,ssbd=on,md-clear=on,vmx=on,vme=on,f16c=on,rdrand=on,hypervisor=on,arat=on,xsaveopt=on,abm=on -m size=1048576k,slots=16,maxmem=4194304k -overcommit mem-lock=off -smp 1,maxcpus=16,sockets=16,dies=1,cores=1,threads=1 -object {"qom-type":"iothread","id":"iothread1"} -object {"qom-type":"memory-backend-ram","id":"ram-node0","size":1073741824} -numa node,nodeid=0,cpus=0-15,memdev=ram-node0 -uuid b99d8772-7c69-4856-b05a-10ad07b6cbcb -smbios type=1,manufacturer=Red Hat,product=RHEL,version=8.6-0.1.el8,serial=4c4c4544-0031-5910-8033-b8c04f304432,uuid=b99d8772-7c69-4856-b05a-10ad07b6cbcb,sku=8.6.0,family=RHV -smbios type=2,manufacturer=Red Hat,product=RHEL-AV -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=45,server=on,wait=off -mon chardev=charmonitor,id=monitor,mode=control -rtc base=2022-07-12T09:37:07,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 -boot strict=on -device pcie-root-port,port=16,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2 -device pcie-root-port,port=17,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-root-port,port=18,chassis=3,id=pci.3,bus=pcie.0,addr=0x2.0x2 -device pcie-root-port,port=19,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x3 -device pcie-root-port,port=20,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x4 -device pcie-root-port,port=21,chassis=6,id=pci.6,bus=pcie.0,addr=0x2.0x5 -device pcie-root-port,port=22,chassis=7,id=pci.7,bus=pcie.0,addr=0x2.0x6 -device pcie-root-port,port=23,chassis=8,id=pci.8,bus=pcie.0,addr=0x2.0x7 -device pcie-root-port,port=24,chassis=9,id=pci.9,bus=pcie.0,multifunction=on,addr=0x3 -device pcie-root-port,port=25,chassis=10,id=pci.10,bus=pcie.0,addr=0x3.0x1 -device pcie-root-port,port=26,chassis=11,id=pci.11,bus=pcie.0,addr=0x3.0x2 -device pcie-root-port,port=27,chassis=12,id=pci.12,bus=pcie.0,addr=0x3.0x3 -device pcie-root-port,port=28,chassis=13,id=pci.13,bus=pcie.0,addr=0x3.0x4 -device pcie-root-port,port=29,chassis=14,id=pci.14,bus=pcie.0,addr=0x3.0x5 -device pcie-root-port,port=30,chassis=15,id=pci.15,bus=pcie.0,addr=0x3.0x6 -device pcie-root-port,port=31,chassis=16,id=pci.16,bus=pcie.0,addr=0x3.0x7 -device qemu-xhci,p2=8,p3=8,id=ua-4cf3cd60-4572-44ef-8cdb-6ce559cc16b9,bus=pci.4,addr=0x0 -device virtio-scsi-pci,iothread=iothread1,id=ua-39aa9927-645b-451a-9987-805474ef418b,bus=pci.3,addr=0x0 -device virtio-serial-pci,id=ua-0a837287-81ab-4e37-9a1a-ad98505de772,max_ports=16,bus=pci.2,addr=0x0 -device ide-cd,bus=ide.2,id=ua-f229b9ca-883b-43c7-a03c-29d302d2132c,werror=report,rerror=report -blockdev {"driver":"host_device","filename":"/rhev/data-center/mnt/blockSD/6814fac1-2e9a-4ab3-bbfe-02a14c2049fe/images/f6bbb2fa-36cf-46b0-b045-abc4b283e38e/0dde2932-14c7-4d11-96a5-b4220e2644b0","aio":"native","node-name":"libvirt-1-storage","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-1-format","read-only":false,"cache":{"direct":true,"no-flush":false},"driver":"qcow2","file":"libvirt-1-storage","backing":null} -device scsi-hd,bus=ua-39aa9927-645b-451a-9987-805474ef418b.0,channel=0,scsi-id=0,lun=0,device_id=f6bbb2fa-36cf-46b0-b045-abc4b283e38e,drive=libvirt-1-format,id=ua-f6bbb2fa-36cf-46b0-b045-abc4b283e38e,bootindex=1,write-cache=on,serial=f6bbb2fa-36cf-46b0-b045-abc4b283e38e,werror=stop,rerror=stop -netdev tap,fd=46,id=hostua-b88d180d-662b-42a1-bf01-f0b6a09d1324,vhost=on,vhostfd=48 -device virtio-net-pci,host_mtu=1500,netdev=hostua-b88d180d-662b-42a1-bf01-f0b6a09d1324,id=ua-b88d180d-662b-42a1-bf01-f0b6a09d1324,mac=00:1a:4a:16:01:27,bus=pci.1,addr=0x0 -chardev socket,id=charchannel0,fd=40,server=on,wait=off -device virtserialport,bus=ua-0a837287-81ab-4e37-9a1a-ad98505de772.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 -chardev spicevmc,id=charchannel1,name=vdagent -device virtserialport,bus=ua-0a837287-81ab-4e37-9a1a-ad98505de772.0,nr=2,chardev=charchannel1,id=channel1,name=com.redhat.spice.0 -device usb-tablet,id=input0,bus=ua-4cf3cd60-4572-44ef-8cdb-6ce559cc16b9.0,port=1 -audiodev {"id":"audio1","driver":"spice"} -vnc 10.37.192.44:0,password=on,audiodev=audio1 -k en-us -spice port=5901,tls-port=5902,addr=10.37.192.44,x509-dir=/etc/pki/vdsm/libvirt-spice,tls-channel=main,tls-channel=display,tls-channel=inputs,tls-channel=cursor,tls-channel=playback,tls-channel=record,tls-channel=smartcard,tls-channel=usbredir,seamless-migration=on -device qxl-vga,id=ua-1721d469-709c-4a55-9c00-f84caa1bd380,ram_size=67108864,vram_size=33554432,vram64_size_mb=0,vgamem_mb=16,max_outputs=1,bus=pcie.0,addr=0x1 -incoming defer -device virtio-balloon-pci,id=ua-883315e9-2623-4966-a590-300ff4898e10,bus=pci.5,addr=0x0 -object {"qom-type":"rng-random","id":"objua-13e255d4-6c47-414a-a2d7-44152dc94ea9","filename":"/dev/urandom"} -device virtio-rng-pci,rng=objua-13e255d4-6c47-414a-a2d7-44152dc94ea9,id=ua-13e255d4-6c47-414a-a2d7-44152dc94ea9,bus=pci.6,addr=0x0 -device vmcoreinfo -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
Thank you for the additional information. That it happens with 4.18.0-372.9.1.el8.x86_64 kernel and not on RHEL 8.5 indicates it may be related to the other migration-related problems experienced recently. But that it happens also on RHEL 7.9 looks surprising to me. David, do you think this problem could be related to the already known migration bugs or should I file a separate platform bug?
Given the bug is public we will reference this in KCS, can we please make sure comments are public whenever possible?
The kernel in the recent RHEL 8.6 update (kernel-4.18.0-372.26.1.el8_6) contains possibly related fixes. Does the problem still exist with the updated (on both the source and destination hosts) kernel?
This bug has low overall severity and is not going to be further verified by QE. If you believe special care is required, feel free to properly align relevant severity, flags and keywords to raise PM_Score or use one of the Bumps ('PrioBumpField', 'PrioBumpGSS', 'PrioBumpPM', 'PrioBumpQA') in Keywords to raise it's PM_Score above verification threashold (1000).