Description of problem: qemu core dumped after repeatedly hotplug/hotunplug macvtap device. qemu ouput:(qemu) qemu-kvm: util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. Version-Release number of selected component (if applicable): kernel-4.18.0-215.el8.x86_64 qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64 How reproducible: 100% Steps to Reproduce: 1.Create a macvtap device. # ip link add link switch name macvtap0 type macvtap mode bridge # ip link set brg_macvtap0 up # # ip -d link show macvtap0 14: macvtap0@switch: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 500 link/ether da:fa:c6:08:37:14 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 macvtap mode bridge addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 2.Boot a rhel8.3 guest qemu cli: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35 \ -device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 \ -device pcie-pci-bridge,id=pcie-pci-bridge-0,addr=0x0,bus=pcie-root-port-0 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x2 \ -m 8G \ -smp 16,maxcpus=16,cores=8,threads=1,dies=1,sockets=2 \ -cpu 'Haswell-noTSX',+kvm_pv_unhalt \ -device pcie-root-port,id=pcie-root-port-1,port=0x1,addr=0x1.0x1,bus=pcie.0,chassis=2 \ -device qemu-xhci,id=usb1,bus=pcie-root-port-1,addr=0x0 \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -device pcie-root-port,id=pcie-root-port-2,port=0x2,addr=0x1.0x2,bus=pcie.0,chassis=3 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,bus=pcie-root-port-2,addr=0x0 \ -blockdev node-name=file_image1,driver=file,aio=threads,filename=/home/rhel.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -monitor stdio \ -device pcie-root-port,id=pcie_extra_root_port_0,multifunction=on,bus=pcie.0,addr=0x3,chassis=5 \ -qmp tcp:0:5555,server,nowait \ -device pcie-root-port,id=pcie-root-port-3,port=0x3,addr=0x1.0x3,bus=pcie.0,chassis=4 \ 9<>/dev/tap14 \ 2.Hot plug macvtap device,then hot unplug it. {"execute":"qmp_capabilities"} {"return": {}} {"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}} {"return": {}} {"execute":"device_add","arguments":{"driver":"virtio-net-pci","netdev":"hostnet0","mac":"fe:ec:d3:f9:a4:dd","id":"net0","bus":"pcie-root-port-3"}} {"return": {}} {"timestamp": {"seconds": 1592379851, "microseconds": 662658}, "event": "NIC_RX_FILTER_CHANGED", "data": {"name": "net0", "path": "/machine/peripheral/net0/virtio-backend"}} {"execute":"device_del","arguments":{"id":"net0"}} {"return": {}} {"timestamp": {"seconds": 1592379908, "microseconds": 811989}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/net0/virtio-backend"}} {"timestamp": {"seconds": 1592379908, "microseconds": 871122}, "event": "DEVICE_DELETED", "data": {"device": "net0", "path": "/machine/peripheral/net0"}} {"execute":"netdev_del","arguments":{"id":"hostnet0"}} {"return": {}} 3.Hot plug this macvtap device again,qemu core dumped. {"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}} Actual results: qemu core dumped Expected results: guest can works well Additional info: 1.rhel 8.2.1-av has same issue. Test version: qemu-kvm-4.2.0-25.module+el8.2.1+6985+9fd9d514.x86_64 2. If we follow the procedure below, guest will no qemu core dumped, but guest can not obtain ip adress. ==>steps Hot plug --> Hot unplug --> reboot guest --> Hot plug 3. core dump info: http://fileshare.englab.nay.redhat.com/pub/section2/coredump/var/crash/leiyang/macvtap/
Additional info: # gdb core.qemu-kvm.5.0.0-0.module+el8.3.0-info (gdb) bt full #0 0x00007eff1a5d17ff in raise () at /lib64/libc.so.6 #1 0x00007eff1a5bbc35 in abort () at /lib64/libc.so.6 #2 0x00007eff1a5bbb09 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 #3 0x00007eff1a5c9de6 in .annobin_assert.c_end () at /lib64/libc.so.6 #4 0x0000560ecccb7f41 in qemu_set_nonblock (fd=fd@entry=9) at /usr/src/debug/qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64/util/oslib-posix.c:251 f = <optimized out> __PRETTY_FUNCTION__ = "qemu_set_nonblock" #5 0x0000560eccba1189 in net_init_tap (netdev=0x7ffda14b1530, name=0x560ece63ce30 "hostnet0", peer=0x0, errp=0x7ffda14b1528) at /usr/src/debug/qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64/net/tap.c:798 tap = 0x7ffda14b1540 fd = 9 --Type <RET> for more, q to quit, c to continue without paging-- vnet_hdr = 0 i = 0 queues = 1 script = 0x0 downscript = 0x0 err = 0x0 vhostfdname = 0x0 ifname = "X\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000x\023K\241\375\177\000\000\377\377\377\377\000\000\000\000\000@\000\000\000\000\000\000\347ս\314\001\000\000\000\377\377\377\377\377\377\377\377\004\000\000\000\000\000\000\000\200\310[\316\016V\000\000@\366\016\341\374~\000\000\006\000\000\000\000\000\000\000\t)\t\037\377~\000\000\252\233\343\314\000\000\000\000\000\000\000\000\001\000\000\000\020\000\000\000\375\177\000\000\360t]\316\016V\000" __PRETTY_FUNCTION__ = "net_init_tap" __func__ = "net_init_tap" #6 0x0000560eccb91078 in net_client_init1 (object=0x7ffda14b1530, is_netdev=<optimized out>, errp=0x7ffda14b1528) at /usr/src/debug/qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64/net/net.c:1055 legacy = {id = 0x0, type = NET_CLIENT_DRIVER_NONE, u = {nic = {has_netdev = false, netdev = 0x0, has_macaddr = false, macaddr = 0x0, has_model = false, model = 0x0, has_addr = false, addr = 0x0, has_vectors = false, vectors = 0}, user = {has_hostname = false, hostname = 0x0, has_q_restrict = false, q_restrict = false, has_ipv4 = false, ipv4 = false, has_ipv6 = false, ipv6 = false, has_ip = false, ip = 0x0, has_net = false, net = 0x0, has_host = false, host = 0x0, has_tftp = false, tftp = 0x0, has_bootfile = false, bootfile = 0x0, has_dhcpstart = false, dhcpstart = 0x0, has_dns = false, dns = 0x0, has_dnssearch = false, dnssearch = 0x0, has_domainname = false, domainname = 0x0, has_ipv6_prefix = false, ipv6_prefix = 0x0, has_ipv6_prefixlen = false, ipv6_prefixlen = 0, has_ipv6_host = false, ipv6_host = 0x0, has_ipv6_dns = false, ipv6_dns = 0x0, has_smb = false, smb = 0x0, has_smbserver = false, smbserver = 0x0, has_hostfwd = false, hostfwd = 0x0, has_guestfwd = false, guestfwd = 0x0, has_tftp_server_name = false, tftp_server_name = 0x0}, tap = {has_ifname = false, ifname = 0x0, has_fd = false, fd = 0x0, has_fds = false, fds = 0x0, has_script = false, script = 0x0, has_downscript = false, downscript = 0x0, has_br = false, br = 0x0, has_helper = false, helper = 0x0, has_sndbuf = false, sndbuf = 0, has_vnet_hdr = false, vnet_hdr = false, has_vhost = false, vhost = false, has_vhostfd = false, vhostfd = 0x0, has_vhostfds = false, vhostfds = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 0, has_poll_us = false, poll_us = 0}, l2tpv3 = {src = 0x0, dst = 0x0, has_srcport = false, srcport = 0x0, has--Type <RET> for more, q to quit, c to continue without paging-- _dstport = false, dstport = 0x0, has_ipv6 = false, ipv6 = false, has_udp = false, udp = false, has_cookie64 = false, cookie64 = false, has_counter = false, counter = false, has_pincounter = false, pincounter = false, has_txcookie = false, txcookie = 0, has_rxcookie = false, rxcookie = 0, txsession = 0, has_rxsession = false, rxsession = 0, has_offset = false, offset = 0}, socket = {has_fd = false, fd = 0x0, has_listen = false, listen = 0x0, has_connect = false, connect = 0x0, has_mcast = false, mcast = 0x0, has_localaddr = false, localaddr = 0x0, has_udp = false, udp = 0x0}, vde = {has_sock = false, sock = 0x0, has_port = false, port = 0, has_group = false, group = 0x0, has_mode = false, mode = 0}, bridge = {has_br = false, br = 0x0, has_helper = false, helper = 0x0}, hubport = {hubid = 0, has_netdev = false, netdev = 0x0}, netmap = {ifname = 0x0, has_devname = false, devname = 0x0}, vhost_user = {chardev = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 0}}} netdev = 0x7ffda14b1530 name = <optimized out> peer = 0x0 __func__ = "net_client_init1" __PRETTY_FUNCTION__ = "net_client_init1" #7 0x0000560eccbaeec9 in qmp_marshal_netdev_add (args=<optimized out>, ret=<optimized out>, errp=0x7ffda14b16c8) at qapi/qapi-commands-net.c:76 err = 0x0 v = 0x560ecee2af20 arg = {id = 0x560ece63ce30 "hostnet0", type = NET_CLIENT_DRIVER_TAP, u = {nic = {has_netdev = false, netdev = 0x0, has_macaddr = true, macaddr = 0x560ececb7de0 "9", has_model = false, model = 0x0, has_addr = false, addr = 0x0, has_vectors = false, vectors = 0}, user = {has_hostname = false, hostname = 0x0, has_q_restrict = true, q_restrict = false, has_ipv4 = false, ipv4 = false, has_ipv6 = false, ipv6 = false, has_ip = false, ip = 0x560ececb7de0 "9", has_net = false, net = 0x0, has_host = false, host = 0x0, has_tftp = false, tftp = 0x0, has_bootfile = false, bootfile = 0x0, has_dhcpstart = false, dhcpstart = 0x0, has_dns = false, dns = 0x0, has_dnssearch = false, dnssearch = 0x0, has_domainname = false, domainname = 0x0, has_ipv6_prefix = false, ipv6_prefix = 0x0, has_ipv6_prefixlen = false, ipv6_prefixlen = 0, has_ipv6_host = false, ipv6_host = 0x0, has_ipv6_dns = false, ipv6_dns = 0x0, has_smb = false, smb = 0x0, has_smbserver = false, smbserver = 0x0, has_hostfwd = false, hostfwd = 0x0, has_guestfwd = false, guestfwd = 0x0, has_tftp_server_name = false, tftp_server_name = 0x0}, tap = {has_ifname = false, ifname = 0x0, has_fd = true, fd = 0x560ececb7de0 "9", has_fds = false, fds = 0x0, has_script = false, script = 0x0, has_downscript = false, downscript = 0x0, has_br = false, br = 0x0, has_helper = false, helper = 0x0, has_sndbuf = false, sndbuf = 0, has_vnet_hdr =--Type <RET> for more, q to quit, c to continue without paging-- false, vnet_hdr = false, has_vhost = true, vhost = true, has_vhostfd = false, vhostfd = 0x0, has_vhostfds = false, vhostfds = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 0, has_poll_us = false, poll_us = 0}, l2tpv3 = {src = 0x0, dst = 0x0, has_srcport = true, srcport = 0x560ececb7de0 "9", has_dstport = false, dstport = 0x0, has_ipv6 = false, ipv6 = false, has_udp = false, udp = false, has_cookie64 = false, cookie64 = false, has_counter = false, counter = false, has_pincounter = false, pincounter = false, has_txcookie = false, txcookie = 0, has_rxcookie = false, rxcookie = 0, txsession = 0, has_rxsession = false, rxsession = 0, has_offset = false, offset = 0}, socket = {has_fd = false, fd = 0x0, has_listen = true, listen = 0x560ececb7de0 "9", has_connect = false, connect = 0x0, has_mcast = false, mcast = 0x0, has_localaddr = false, localaddr = 0x0, has_udp = false, udp = 0x0}, vde = {has_sock = false, sock = 0x0, has_port = true, port = 0, has_group = false, group = 0x560ececb7de0 "9", has_mode = false, mode = 0}, bridge = {has_br = false, br = 0x0, has_helper = true, helper = 0x560ececb7de0 "9"}, hubport = {hubid = 0, has_netdev = false, netdev = 0x0}, netmap = {ifname = 0x0, has_devname = false, devname = 0x1 <error: Cannot access memory at address 0x1>}, vhost_user = {chardev = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 1}}} #8 0x0000560eccc69d70 in qmp_dispatch (cmds=0x560ecd4f7540 <qmp_commands>, request=<optimized out>, allow_oob=<optimized out>)
I think this happens because in tap_cleanup() the fd (9) is closed on netdev_del, and you can't reuse it with netdev_add.
It is another way to produce bug described in BZ 1708076
Perhaps in this case we might avoid to crash a running system. This change should fix the problem: diff --git a/net/tap.c b/net/tap.c index ca48f2a285..f1bb1b99e8 100644 --- a/net/tap.c +++ b/net/tap.c @@ -795,6 +795,11 @@ int net_init_tap(const Netdev *netdev, const char *name, error_propagate(errp, err); return -1; } + /* Check if fd is valid */ + if (fcntl(fd, F_GETFD) == -1) { + error_setg(errp, "Invalid file descriptor"); + return -1; + } qemu_set_nonblock(fd);
Sent a new series upstream to fix the problem: [PATCH v2 0/2] net: tap: check file descriptor can be used https://patchew.org/QEMU/20200630145737.232095-1-lvivier@redhat.com/
Merged upstream in 5.1.0-rc1 e7b347d0bf64 net: detect errors from probing vnet hdr flag for TAP devices https://github.com/qemu/qemu/commit/e7b347d0bf640adb1c998d317eaf44d2d7cbd973 894022e61601 net: check if the file descriptor is valid before using it https://github.com/qemu/qemu/commit/894022e616016fe81745753f14adfbd680a1c7ee
*** Bug 1708076 has been marked as a duplicate of this bug. ***
===Verified with qemu-kvm-5.1.0-2.module+el8.3.0+7652+b30e6901.x86_64 1.Create a macvtap device. # ip link add link switch name macvtap0 type macvtap mode bridge # ip link set macvtap0 up # ip -d link show macvtap0 11: macvtap0@switch: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 500 link/ether c2:8f:97:03:9c:1e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 macvtap mode bridge addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 2.Boot a rhel8.3 guest qemu cli: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35 \ -device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 \ -device pcie-pci-bridge,id=pcie-pci-bridge-0,addr=0x0,bus=pcie-root-port-0 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x2 \ -m 8G \ -smp 16,maxcpus=16,cores=8,threads=1,dies=1,sockets=2 \ -cpu 'Haswell-noTSX',+kvm_pv_unhalt \ -device pcie-root-port,id=pcie-root-port-1,port=0x1,addr=0x1.0x1,bus=pcie.0,chassis=2 \ -device qemu-xhci,id=usb1,bus=pcie-root-port-1,addr=0x0 \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -device pcie-root-port,id=pcie-root-port-2,port=0x2,addr=0x1.0x2,bus=pcie.0,chassis=3 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,bus=pcie-root-port-2,addr=0x0 \ -blockdev node-name=file_image1,driver=file,aio=threads,filename=/home/rhel830-64-virtio-scsi.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -monitor stdio \ -device pcie-root-port,id=pcie_extra_root_port_0,multifunction=on,bus=pcie.0,addr=0x3,chassis=5 \ -qmp tcp:0:5555,server,nowait \ -device pcie-root-port,id=pcie-root-port-3,port=0x3,addr=0x1.0x3,bus=pcie.0,chassis=4 \ 9<>/dev/tap11 \ 2.Hot plug macvtap device,then hot unplug it. {"execute":"qmp_capabilities"} {"return": {}} {"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}} {"return": {}} {"execute":"device_add","arguments":{"driver":"virtio-net-pci","netdev":"hostnet0","mac":"c2:8f:97:03:9c:1e","id":"net0","bus":"pcie-root-port-3"}} {"return": {}} {"timestamp": {"seconds": 1598257197, "microseconds": 458232}, "event": "NIC_RX_FILTER_CHANGED", "data": {"name": "net0", "path": "/machine/peripheral/net0/virtio-backend"}} {"execute":"device_del","arguments":{"id":"net0"}} {"return": {}} {"timestamp": {"seconds": 1598257282, "microseconds": 276488}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/net0/virtio-backend"}} {"timestamp": {"seconds": 1598257282, "microseconds": 332422}, "event": "DEVICE_DELETED", "data": {"device": "net0", "path": "/machine/peripheral/net0"}} {"execute":"netdev_del","arguments":{"id":"hostnet0"}} {"return": {}} 3.Hot plug this macvtap device again,qemu core dumped. {"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}} {"error": {"class": "GenericError", "desc": "hostnet0: Can't use file descriptor 9: Bad file descriptor"}} 4.Guest does not core dump. So this bug has been fixed very well. Move to 'VERIFIED'.
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (virt:8.3 bug fix and enhancement update), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:5137