Description of problem: ovn-controller is using 100% cpu when receive lots of incorrect arp packets Version-Release number of selected component (if applicable): [root@dell-per730-19 ovn]# uname -a Linux dell-per730-19.rhts.eng.pek2.redhat.com 3.10.0-1060.el7.x86_64 #1 SMP Mon Jul 1 18:28:13 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux [root@dell-per730-19 ovn]# rpm -qa | grep openvswitch openvswitch-selinux-extra-policy-1.0-11.el7fdp.noarch openvswitch2.11-2.11.0-14.el7fdp.x86_64 [root@dell-per730-19 ovn]# rpm -qa | grep ovn ovn2.11-host-2.11.0-19.el7fdp.x86_64 ovn2.11-central-2.11.0-19.el7fdp.x86_64 ovn2.11-2.11.0-19.el7fdp.x86_64 [root@dell-per730-19 ovn]# How reproducible: everytime Steps to Reproduce: 1.setup ovn environment and add a lot of logical switch port 2.connect a guest to logical switch 3.send a lot of incorrect arp packets with 0.0.0.0 as the "Target IP address" Actual results: the cpu usage is 101% by the ovn-controller scripts in guest: from scapy.all import * for x in range(1000): sendp(Ether(src="00:de:ad:01:00:01", dst="ff:ff:ff:ff:ff:ff")/ARP(op=1,hwsrc='00:de:ad:01:00:01',hwdst='00:00:00:00:00:00',psrc='172.16.102.11',pdst='0.0.0.0'),iface="eth1") [root@dell-per730-19 ~]# top top - 02:11:07 up 3 days, 4:07, 2 users, load average: 0.18, 0.06, 0.06 Tasks: 465 total, 2 running, 463 sleeping, 0 stopped, 0 zombie %Cpu(s): 3.8 us, 0.3 sy, 0.0 ni, 95.8 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st KiB Mem : 65708808 total, 56629192 free, 3094836 used, 5984780 buff/cache KiB Swap: 29241340 total, 29241340 free, 0 used. 62099024 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 11493 root 10 -10 285408 9848 1716 R 100.7 0.0 1:58.82 ovn-controller 11650 qemu 20 0 2974064 929068 11900 S 43.4 1.4 2:51.37 qemu-kvm 11419 openvsw+ 10 -10 2323772 97776 17916 S 10.9 0.1 1:43.32 ovs-vswitchd 10527 root 20 0 1555548 26604 14540 S 1.0 0.0 0:03.44 libvirtd 11819 qemu 20 0 2950492 614672 11904 S 1.0 0.9 1:37.65 qemu-kvm 5834 root 20 0 152752 5740 4408 S 0.7 0.0 0:02.07 sshd 6064 root 20 0 162312 2684 1600 R 0.7 0.0 1:16.23 top 12325 root 20 0 0 0 0 S 0.7 0.0 0:00.42 vhost-11650 13975 root 20 0 347304 6876 5160 S 0.7 0.0 0:00.17 virsh 9 root 20 0 0 0 0 S 0.3 0.0 0:52.33 rcu_sched 1443 root 20 0 22296 1960 996 S 0.3 0.0 3:28.58 irqbalance 3592 root 20 0 548544 9536 6784 S 0.3 0.0 0:24.74 NetworkManager 12048 root 20 0 0 0 0 S 0.3 0.0 0:00.19 kworker/8:1 12355 root 20 0 0 0 0 S 0.3 0.0 0:00.07 vhost-1181 [root@dell-per730-19 ovn]# cat /var/log/openvswitch/ovn-controller.log | grep CPU | tail -4 2019-07-15T06:11:07.320Z|00056|poll_loop|INFO|wakeup due to 0-ms timeout at ovn/controller/pinctrl.c:2489 (100% CPU usage) 2019-07-15T06:11:08.244Z|00058|poll_loop|INFO|wakeup due to 0-ms timeout at ovn/controller/pinctrl.c:2489 (100% CPU usage) 2019-07-15T06:11:14.249Z|00060|poll_loop|INFO|wakeup due to 0-ms timeout at ovn/controller/pinctrl.c:2489 (100% CPU usage) 2019-07-15T06:11:21.322Z|00062|poll_loop|INFO|wakeup due to [POLLIN] on fd 21 (20.0.0.25:48756<->20.0.0.25:6642) at lib/stream-fd.c:157 (99% CPU usage) [root@dell-per730-19 ovn]# Expected results: cpu usage is not high Additional info:
This bug is verified on the latest version: root@dell-per740-18 ovn]# uname -a Linux dell-per740-18.rhts.eng.pek2.redhat.com 3.10.0-1062.el7.x86_64 #1 SMP Thu Jul 18 20:25:13 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux [root@dell-per740-18 ovn]# rpm -qa | grep openvswitch openvswitch-selinux-extra-policy-1.0-14.el7fdp.noarch openvswitch2.11-2.11.0-26.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn_ha-1.0-41.noarch [root@dell-per740-18 ovn]# rpm -qa | grep ovn ovn2.11-2.11.1-8.el7fdp.x86_64 ovn2.11-central-2.11.1-8.el7fdp.x86_64 ovn2.11-host-2.11.1-8.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn_ha-1.0-41.noarch [root@dell-per740-18 ovn]# [root@dell-per740-18 ovn]# ovn-nbctl show switch 086e87ca-1eee-4440-9790-6f0d7859360d (s3) port hv0_vm00_vnet1 addresses: ["00:de:ad:00:00:01 172.16.103.11"] port hv0_vm01_vnet1 addresses: ["00:de:ad:00:01:01 172.16.103.12"] port s3_r1 type: router addresses: ["00:de:ad:ff:01:03 172.16.103.1"] router-port: r1_s3 switch ecc8b593-19fe-4509-8590-a68edcd2185d (public) port ln_p1 type: localnet addresses: ["unknown"] port public_r1 type: router router-port: r1_public switch 3827320f-2e1f-4ad2-9394-cfef23e086dc (s2) port s2_r1 type: router addresses: ["00:de:ad:ff:01:02 172.16.102.1"] router-port: r1_s2 port hv1_vm01_vnet1 addresses: ["00:de:ad:01:01:01 172.16.102.12"] port hv1_vm00_vnet1 addresses: ["00:de:ad:01:00:01 172.16.102.11"] router 5b2f265f-abdd-4f82-b57f-a45ed441f52d (r1) port r1_s3 mac: "00:de:ad:ff:01:03" networks: ["172.16.103.1/24"] port r1_public mac: "40:44:00:00:00:03" networks: ["172.16.104.1/24"] port r1_s2 mac: "00:de:ad:ff:01:02" networks: ["172.16.102.1/24"] nat 599e4d31-0fba-4af3-8dc5-cca7adea2b42 external ip: "172.16.104.200" logical ip: "172.16.102.11" type: "dnat_and_snat" nat de0cfce7-68c6-4612-bdf8-4eaa5299e6c8 external ip: "172.16.104.201" logical ip: "172.16.103.11" type: "dnat_and_snat" [root@dell-per740-18 ovn]# scripts in guest: from scapy.all import * for x in range(1000): sendp(Ether(src="00:de:ad:01:00:01", dst="ff:ff:ff:ff:ff:ff")/ARP(op=1,hwsrc='00:de:ad:01:00:01',hwdst='00:00:00:00:00:00',psrc='172.16.102.11',pdst='0.0.0.0'),iface="eth1") [root@dell-per740-18 ~]# top top - 23:40:53 up 1 day, 19:50, 2 users, load average: 0.61, 0.16, 0.08 Tasks: 533 total, 1 running, 532 sleeping, 0 stopped, 0 zombie %Cpu(s): 0.1 us, 0.1 sy, 0.0 ni, 99.9 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st KiB Mem : 65280816 total, 56768988 free, 2849836 used, 5661992 buff/cache KiB Swap: 32767996 total, 32767996 free, 0 used. 61932168 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 28062 openvsw+ 10 -10 3413632 141724 17936 S 3.0 0.2 36:41.50 ovs-vswitchd 28503 qemu 20 0 2920808 697496 10248 S 1.0 1.1 3:01.08 qemu-kvm 1525 root 20 0 21884 1620 996 S 0.3 0.0 2:20.03 irqbalance 25468 root 20 0 102896 5528 3444 S 0.3 0.0 0:00.27 dhclient 28148 root 10 -10 281840 10420 1820 S 0.3 0.0 0:21.79 ovn-controller 28333 qemu 20 0 2920804 776872 10252 S 0.3 1.2 4:00.01 qemu-kvm 87543 root 20 0 162456 2740 1580 R 0.3 0.0 0:00.11 top 1 root 20 0 193924 7064 4216 S 0.0 0.0 0:10.96 systemd 2 root 20 0 0 0 0 S 0.0 0.0 0:00.06 kthreadd
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory, and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2019:3718