Bug 1848274
| Summary: | qemu core dumped after repeatedly hotplug/hotunplug macvtap device | ||
|---|---|---|---|
| Product: | Red Hat Enterprise Linux Advanced Virtualization | Reporter: | Lei Yang <leiyang> |
| Component: | qemu-kvm | Assignee: | Laurent Vivier <lvivier> |
| qemu-kvm sub component: | Networking | QA Contact: | Lei Yang <leiyang> |
| Status: | CLOSED ERRATA | Docs Contact: | |
| Severity: | medium | ||
| Priority: | medium | CC: | aadam, chayang, coli, ddepaula, jinzhao, juzhang, virt-maint |
| Version: | 8.3 | Keywords: | Triaged |
| Target Milestone: | rc | Flags: | pm-rhel:
mirror+
|
| Target Release: | 8.3 | ||
| Hardware: | Unspecified | ||
| OS: | Unspecified | ||
| Whiteboard: | |||
| Fixed In Version: | qemu-kvm-5.1.0-2.module+el8.3.0+7652+b30e6901 | Doc Type: | If docs needed, set a value |
| Doc Text: | Story Points: | --- | |
| Clone Of: | Environment: | ||
| Last Closed: | 2020-11-17 17:49:16 UTC | Type: | Bug |
| Regression: | --- | Mount Type: | --- |
| Documentation: | --- | CRM: | |
| Verified Versions: | Category: | --- | |
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |
| Cloudforms Team: | --- | Target Upstream Version: | |
| Embargoed: | |||
Additional info:
# gdb core.qemu-kvm.5.0.0-0.module+el8.3.0-info
(gdb) bt full
#0 0x00007eff1a5d17ff in raise () at /lib64/libc.so.6
#1 0x00007eff1a5bbc35 in abort () at /lib64/libc.so.6
#2 0x00007eff1a5bbb09 in _nl_load_domain.cold.0 () at /lib64/libc.so.6
#3 0x00007eff1a5c9de6 in .annobin_assert.c_end () at /lib64/libc.so.6
#4 0x0000560ecccb7f41 in qemu_set_nonblock (fd=fd@entry=9)
at /usr/src/debug/qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64/util/oslib-posix.c:251
f = <optimized out>
__PRETTY_FUNCTION__ = "qemu_set_nonblock"
#5 0x0000560eccba1189 in net_init_tap
(netdev=0x7ffda14b1530, name=0x560ece63ce30 "hostnet0", peer=0x0, errp=0x7ffda14b1528)
at /usr/src/debug/qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64/net/tap.c:798
tap = 0x7ffda14b1540
fd = 9
--Type <RET> for more, q to quit, c to continue without paging--
vnet_hdr = 0
i = 0
queues = 1
script = 0x0
downscript = 0x0
err = 0x0
vhostfdname = 0x0
ifname = "X\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000x\023K\241\375\177\000\000\377\377\377\377\000\000\000\000\000@\000\000\000\000\000\000\347ս\314\001\000\000\000\377\377\377\377\377\377\377\377\004\000\000\000\000\000\000\000\200\310[\316\016V\000\000@\366\016\341\374~\000\000\006\000\000\000\000\000\000\000\t)\t\037\377~\000\000\252\233\343\314\000\000\000\000\000\000\000\000\001\000\000\000\020\000\000\000\375\177\000\000\360t]\316\016V\000"
__PRETTY_FUNCTION__ = "net_init_tap"
__func__ = "net_init_tap"
#6 0x0000560eccb91078 in net_client_init1 (object=0x7ffda14b1530, is_netdev=<optimized out>, errp=0x7ffda14b1528)
at /usr/src/debug/qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64/net/net.c:1055
legacy =
{id = 0x0, type = NET_CLIENT_DRIVER_NONE, u = {nic = {has_netdev = false, netdev = 0x0, has_macaddr = false, macaddr = 0x0, has_model = false, model = 0x0, has_addr = false, addr = 0x0, has_vectors = false, vectors = 0}, user = {has_hostname = false, hostname = 0x0, has_q_restrict = false, q_restrict = false, has_ipv4 = false, ipv4 = false, has_ipv6 = false, ipv6 = false, has_ip = false, ip = 0x0, has_net = false, net = 0x0, has_host = false, host = 0x0, has_tftp = false, tftp = 0x0, has_bootfile = false, bootfile = 0x0, has_dhcpstart = false, dhcpstart = 0x0, has_dns = false, dns = 0x0, has_dnssearch = false, dnssearch = 0x0, has_domainname = false, domainname = 0x0, has_ipv6_prefix = false, ipv6_prefix = 0x0, has_ipv6_prefixlen = false, ipv6_prefixlen = 0, has_ipv6_host = false, ipv6_host = 0x0, has_ipv6_dns = false, ipv6_dns = 0x0, has_smb = false, smb = 0x0, has_smbserver = false, smbserver = 0x0, has_hostfwd = false, hostfwd = 0x0, has_guestfwd = false, guestfwd = 0x0, has_tftp_server_name = false, tftp_server_name = 0x0}, tap = {has_ifname = false, ifname = 0x0, has_fd = false, fd = 0x0, has_fds = false, fds = 0x0, has_script = false, script = 0x0, has_downscript = false, downscript = 0x0, has_br = false, br = 0x0, has_helper = false, helper = 0x0, has_sndbuf = false, sndbuf = 0, has_vnet_hdr = false, vnet_hdr = false, has_vhost = false, vhost = false, has_vhostfd = false, vhostfd = 0x0, has_vhostfds = false, vhostfds = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 0, has_poll_us = false, poll_us = 0}, l2tpv3 = {src = 0x0, dst = 0x0, has_srcport = false, srcport = 0x0, has--Type <RET> for more, q to quit, c to continue without paging--
_dstport = false, dstport = 0x0, has_ipv6 = false, ipv6 = false, has_udp = false, udp = false, has_cookie64 = false, cookie64 = false, has_counter = false, counter = false, has_pincounter = false, pincounter = false, has_txcookie = false, txcookie = 0, has_rxcookie = false, rxcookie = 0, txsession = 0, has_rxsession = false, rxsession = 0, has_offset = false, offset = 0}, socket = {has_fd = false, fd = 0x0, has_listen = false, listen = 0x0, has_connect = false, connect = 0x0, has_mcast = false, mcast = 0x0, has_localaddr = false, localaddr = 0x0, has_udp = false, udp = 0x0}, vde = {has_sock = false, sock = 0x0, has_port = false, port = 0, has_group = false, group = 0x0, has_mode = false, mode = 0}, bridge = {has_br = false, br = 0x0, has_helper = false, helper = 0x0}, hubport = {hubid = 0, has_netdev = false, netdev = 0x0}, netmap = {ifname = 0x0, has_devname = false, devname = 0x0}, vhost_user = {chardev = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 0}}}
netdev = 0x7ffda14b1530
name = <optimized out>
peer = 0x0
__func__ = "net_client_init1"
__PRETTY_FUNCTION__ = "net_client_init1"
#7 0x0000560eccbaeec9 in qmp_marshal_netdev_add (args=<optimized out>, ret=<optimized out>, errp=0x7ffda14b16c8)
at qapi/qapi-commands-net.c:76
err = 0x0
v = 0x560ecee2af20
arg =
{id = 0x560ece63ce30 "hostnet0", type = NET_CLIENT_DRIVER_TAP, u = {nic = {has_netdev = false, netdev = 0x0, has_macaddr = true, macaddr = 0x560ececb7de0 "9", has_model = false, model = 0x0, has_addr = false, addr = 0x0, has_vectors = false, vectors = 0}, user = {has_hostname = false, hostname = 0x0, has_q_restrict = true, q_restrict = false, has_ipv4 = false, ipv4 = false, has_ipv6 = false, ipv6 = false, has_ip = false, ip = 0x560ececb7de0 "9", has_net = false, net = 0x0, has_host = false, host = 0x0, has_tftp = false, tftp = 0x0, has_bootfile = false, bootfile = 0x0, has_dhcpstart = false, dhcpstart = 0x0, has_dns = false, dns = 0x0, has_dnssearch = false, dnssearch = 0x0, has_domainname = false, domainname = 0x0, has_ipv6_prefix = false, ipv6_prefix = 0x0, has_ipv6_prefixlen = false, ipv6_prefixlen = 0, has_ipv6_host = false, ipv6_host = 0x0, has_ipv6_dns = false, ipv6_dns = 0x0, has_smb = false, smb = 0x0, has_smbserver = false, smbserver = 0x0, has_hostfwd = false, hostfwd = 0x0, has_guestfwd = false, guestfwd = 0x0, has_tftp_server_name = false, tftp_server_name = 0x0}, tap = {has_ifname = false, ifname = 0x0, has_fd = true, fd = 0x560ececb7de0 "9", has_fds = false, fds = 0x0, has_script = false, script = 0x0, has_downscript = false, downscript = 0x0, has_br = false, br = 0x0, has_helper = false, helper = 0x0, has_sndbuf = false, sndbuf = 0, has_vnet_hdr =--Type <RET> for more, q to quit, c to continue without paging--
false, vnet_hdr = false, has_vhost = true, vhost = true, has_vhostfd = false, vhostfd = 0x0, has_vhostfds = false, vhostfds = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 0, has_poll_us = false, poll_us = 0}, l2tpv3 = {src = 0x0, dst = 0x0, has_srcport = true, srcport = 0x560ececb7de0 "9", has_dstport = false, dstport = 0x0, has_ipv6 = false, ipv6 = false, has_udp = false, udp = false, has_cookie64 = false, cookie64 = false, has_counter = false, counter = false, has_pincounter = false, pincounter = false, has_txcookie = false, txcookie = 0, has_rxcookie = false, rxcookie = 0, txsession = 0, has_rxsession = false, rxsession = 0, has_offset = false, offset = 0}, socket = {has_fd = false, fd = 0x0, has_listen = true, listen = 0x560ececb7de0 "9", has_connect = false, connect = 0x0, has_mcast = false, mcast = 0x0, has_localaddr = false, localaddr = 0x0, has_udp = false, udp = 0x0}, vde = {has_sock = false, sock = 0x0, has_port = true, port = 0, has_group = false, group = 0x560ececb7de0 "9", has_mode = false, mode = 0}, bridge = {has_br = false, br = 0x0, has_helper = true, helper = 0x560ececb7de0 "9"}, hubport = {hubid = 0, has_netdev = false, netdev = 0x0}, netmap = {ifname = 0x0, has_devname = false, devname = 0x1 <error: Cannot access memory at address 0x1>}, vhost_user = {chardev = 0x0, has_vhostforce = false, vhostforce = false, has_queues = false, queues = 1}}}
#8 0x0000560eccc69d70 in qmp_dispatch
(cmds=0x560ecd4f7540 <qmp_commands>, request=<optimized out>, allow_oob=<optimized out>)
I think this happens because in tap_cleanup() the fd (9) is closed on netdev_del, and you can't reuse it with netdev_add. It is another way to produce bug described in BZ 1708076 Perhaps in this case we might avoid to crash a running system.
This change should fix the problem:
diff --git a/net/tap.c b/net/tap.c
index ca48f2a285..f1bb1b99e8 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -795,6 +795,11 @@ int net_init_tap(const Netdev *netdev, const char *name,
error_propagate(errp, err);
return -1;
}
+ /* Check if fd is valid */
+ if (fcntl(fd, F_GETFD) == -1) {
+ error_setg(errp, "Invalid file descriptor");
+ return -1;
+ }
qemu_set_nonblock(fd);
Sent a new series upstream to fix the problem: [PATCH v2 0/2] net: tap: check file descriptor can be used https://patchew.org/QEMU/20200630145737.232095-1-lvivier@redhat.com/ Merged upstream in 5.1.0-rc1
e7b347d0bf64 net: detect errors from probing vnet hdr flag for TAP devices
https://github.com/qemu/qemu/commit/e7b347d0bf640adb1c998d317eaf44d2d7cbd973
894022e61601 net: check if the file descriptor is valid before using it
https://github.com/qemu/qemu/commit/894022e616016fe81745753f14adfbd680a1c7ee
*** Bug 1708076 has been marked as a duplicate of this bug. *** ===Verified with qemu-kvm-5.1.0-2.module+el8.3.0+7652+b30e6901.x86_64
1.Create a macvtap device.
# ip link add link switch name macvtap0 type macvtap mode bridge
# ip link set macvtap0 up
# ip -d link show macvtap0
11: macvtap0@switch: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 500
link/ether c2:8f:97:03:9c:1e brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535
macvtap mode bridge addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
2.Boot a rhel8.3 guest
qemu cli:
/usr/libexec/qemu-kvm \
-name 'avocado-vt-vm1' \
-sandbox on \
-machine q35 \
-device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 \
-device pcie-pci-bridge,id=pcie-pci-bridge-0,addr=0x0,bus=pcie-root-port-0 \
-nodefaults \
-device VGA,bus=pcie.0,addr=0x2 \
-m 8G \
-smp 16,maxcpus=16,cores=8,threads=1,dies=1,sockets=2 \
-cpu 'Haswell-noTSX',+kvm_pv_unhalt \
-device pcie-root-port,id=pcie-root-port-1,port=0x1,addr=0x1.0x1,bus=pcie.0,chassis=2 \
-device qemu-xhci,id=usb1,bus=pcie-root-port-1,addr=0x0 \
-device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \
-device pcie-root-port,id=pcie-root-port-2,port=0x2,addr=0x1.0x2,bus=pcie.0,chassis=3 \
-device virtio-scsi-pci,id=virtio_scsi_pci0,bus=pcie-root-port-2,addr=0x0 \
-blockdev node-name=file_image1,driver=file,aio=threads,filename=/home/rhel830-64-virtio-scsi.qcow2,cache.direct=on,cache.no-flush=off \
-blockdev node-name=drive_image1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_image1 \
-device scsi-hd,id=image1,drive=drive_image1,write-cache=on \
-vnc :0 \
-rtc base=utc,clock=host,driftfix=slew \
-boot menu=off,order=cdn,once=c,strict=off \
-enable-kvm \
-monitor stdio \
-device pcie-root-port,id=pcie_extra_root_port_0,multifunction=on,bus=pcie.0,addr=0x3,chassis=5 \
-qmp tcp:0:5555,server,nowait \
-device pcie-root-port,id=pcie-root-port-3,port=0x3,addr=0x1.0x3,bus=pcie.0,chassis=4 \
9<>/dev/tap11 \
2.Hot plug macvtap device,then hot unplug it.
{"execute":"qmp_capabilities"}
{"return": {}}
{"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}}
{"return": {}}
{"execute":"device_add","arguments":{"driver":"virtio-net-pci","netdev":"hostnet0","mac":"c2:8f:97:03:9c:1e","id":"net0","bus":"pcie-root-port-3"}}
{"return": {}}
{"timestamp": {"seconds": 1598257197, "microseconds": 458232}, "event": "NIC_RX_FILTER_CHANGED", "data": {"name": "net0", "path": "/machine/peripheral/net0/virtio-backend"}}
{"execute":"device_del","arguments":{"id":"net0"}}
{"return": {}}
{"timestamp": {"seconds": 1598257282, "microseconds": 276488}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/net0/virtio-backend"}}
{"timestamp": {"seconds": 1598257282, "microseconds": 332422}, "event": "DEVICE_DELETED", "data": {"device": "net0", "path": "/machine/peripheral/net0"}}
{"execute":"netdev_del","arguments":{"id":"hostnet0"}}
{"return": {}}
3.Hot plug this macvtap device again,qemu core dumped.
{"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}}
{"error": {"class": "GenericError", "desc": "hostnet0: Can't use file descriptor 9: Bad file descriptor"}}
4.Guest does not core dump.
So this bug has been fixed very well. Move to 'VERIFIED'.
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (virt:8.3 bug fix and enhancement update), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:5137 |
Description of problem: qemu core dumped after repeatedly hotplug/hotunplug macvtap device. qemu ouput:(qemu) qemu-kvm: util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed. Version-Release number of selected component (if applicable): kernel-4.18.0-215.el8.x86_64 qemu-kvm-5.0.0-0.module+el8.3.0+6620+5d5e1420.x86_64 How reproducible: 100% Steps to Reproduce: 1.Create a macvtap device. # ip link add link switch name macvtap0 type macvtap mode bridge # ip link set brg_macvtap0 up # # ip -d link show macvtap0 14: macvtap0@switch: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 500 link/ether da:fa:c6:08:37:14 brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 65535 macvtap mode bridge addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 2.Boot a rhel8.3 guest qemu cli: /usr/libexec/qemu-kvm \ -name 'avocado-vt-vm1' \ -sandbox on \ -machine q35 \ -device pcie-root-port,id=pcie-root-port-0,multifunction=on,bus=pcie.0,addr=0x1,chassis=1 \ -device pcie-pci-bridge,id=pcie-pci-bridge-0,addr=0x0,bus=pcie-root-port-0 \ -nodefaults \ -device VGA,bus=pcie.0,addr=0x2 \ -m 8G \ -smp 16,maxcpus=16,cores=8,threads=1,dies=1,sockets=2 \ -cpu 'Haswell-noTSX',+kvm_pv_unhalt \ -device pcie-root-port,id=pcie-root-port-1,port=0x1,addr=0x1.0x1,bus=pcie.0,chassis=2 \ -device qemu-xhci,id=usb1,bus=pcie-root-port-1,addr=0x0 \ -device usb-tablet,id=usb-tablet1,bus=usb1.0,port=1 \ -device pcie-root-port,id=pcie-root-port-2,port=0x2,addr=0x1.0x2,bus=pcie.0,chassis=3 \ -device virtio-scsi-pci,id=virtio_scsi_pci0,bus=pcie-root-port-2,addr=0x0 \ -blockdev node-name=file_image1,driver=file,aio=threads,filename=/home/rhel.qcow2,cache.direct=on,cache.no-flush=off \ -blockdev node-name=drive_image1,driver=qcow2,cache.direct=on,cache.no-flush=off,file=file_image1 \ -device scsi-hd,id=image1,drive=drive_image1,write-cache=on \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ -monitor stdio \ -device pcie-root-port,id=pcie_extra_root_port_0,multifunction=on,bus=pcie.0,addr=0x3,chassis=5 \ -qmp tcp:0:5555,server,nowait \ -device pcie-root-port,id=pcie-root-port-3,port=0x3,addr=0x1.0x3,bus=pcie.0,chassis=4 \ 9<>/dev/tap14 \ 2.Hot plug macvtap device,then hot unplug it. {"execute":"qmp_capabilities"} {"return": {}} {"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}} {"return": {}} {"execute":"device_add","arguments":{"driver":"virtio-net-pci","netdev":"hostnet0","mac":"fe:ec:d3:f9:a4:dd","id":"net0","bus":"pcie-root-port-3"}} {"return": {}} {"timestamp": {"seconds": 1592379851, "microseconds": 662658}, "event": "NIC_RX_FILTER_CHANGED", "data": {"name": "net0", "path": "/machine/peripheral/net0/virtio-backend"}} {"execute":"device_del","arguments":{"id":"net0"}} {"return": {}} {"timestamp": {"seconds": 1592379908, "microseconds": 811989}, "event": "DEVICE_DELETED", "data": {"path": "/machine/peripheral/net0/virtio-backend"}} {"timestamp": {"seconds": 1592379908, "microseconds": 871122}, "event": "DEVICE_DELETED", "data": {"device": "net0", "path": "/machine/peripheral/net0"}} {"execute":"netdev_del","arguments":{"id":"hostnet0"}} {"return": {}} 3.Hot plug this macvtap device again,qemu core dumped. {"execute":"netdev_add","arguments":{"type":"tap","id":"hostnet0","vhost": true,"fd":"9"}} Actual results: qemu core dumped Expected results: guest can works well Additional info: 1.rhel 8.2.1-av has same issue. Test version: qemu-kvm-4.2.0-25.module+el8.2.1+6985+9fd9d514.x86_64 2. If we follow the procedure below, guest will no qemu core dumped, but guest can not obtain ip adress. ==>steps Hot plug --> Hot unplug --> reboot guest --> Hot plug 3. core dump info: http://fileshare.englab.nay.redhat.com/pub/section2/coredump/var/crash/leiyang/macvtap/