test with following script: ovn-nbctl set NB_GLOBAL . options:northd_probe_interval=180000 ovn-nbctl set connection . inactivity_probe=180000 ovs-vsctl set open . external_ids:ovn-openflow-probe-interval=180 ovs-vsctl set open . external_ids:ovn-remote-probe-interval=180000 ovn-sbctl set connection . inactivity_probe=180000 ovn-nbctl ls-add public ovn-nbctl lsp-add public ln_p1 ovn-nbctl lsp-set-addresses ln_p1 unknown ovn-nbctl lsp-set-type ln_p1 localnet ovn-nbctl lsp-set-options ln_p1 network_name=nattest i=1 for m in `seq 0 4`;do for n in `seq 1 99`;do ovn-nbctl lr-add r${i} ovn-nbctl lrp-add r${i} r${i}_public 00:de:ad:ff:$m:$n 172.16.$m.$n/16 ovn-nbctl lrp-add r${i} r${i}_s${i} 00:de:ad:fe:$m:$n 173.$m.$n.1/24 ovn-nbctl lr-nat-add r${i} dnat_and_snat 172.16.${m}.$((n+100)) 173.$m.$n.2 ovn-nbctl set logical_router_port r${i}_public options:redirect-chassis=hv1 # s1 ovn-nbctl ls-add s${i} # s1 - r1 ovn-nbctl lsp-add s${i} s${i}_r${i} ovn-nbctl lsp-set-type s${i}_r${i} router ovn-nbctl lsp-set-addresses s${i}_r${i} router ovn-nbctl lsp-set-options s${i}_r${i} router-port=r${i}_s${i} # s1 - vm1 ovn-nbctl lsp-add s$i vm$i ovn-nbctl lsp-set-addresses vm$i "00:de:ad:01:$m:$n 173.$m.$n.2" ovn-nbctl lrp-add r$i r${i}_public 40:44:00:00:$m:$n 172.16.$m.$n/16 ovn-nbctl lsp-add public public_r${i} ovn-nbctl lsp-set-type public_r${i} router ovn-nbctl lsp-set-addresses public_r${i} router ovn-nbctl lsp-set-options public_r${i} router-port=r${i}_public let i++ if [ $i -gt 300 ];then break; fi done if [ $i -gt 300 ];then break; fi done #add host vm1 ip netns add vm1 ovs-vsctl add-port br-int vm1 -- set interface vm1 type=internal ip link set vm1 netns vm1 ip netns exec vm1 ip link set vm1 address 00:de:ad:01:00:01 ip netns exec vm1 ip addr add 173.0.1.2/24 dev vm1 ip netns exec vm1 ip link set vm1 up ovs-vsctl set Interface vm1 external_ids:iface-id=vm1 ip netns add vm2 ovs-vsctl add-port br-int vm2 -- set interface vm2 type=internal ip link set vm2 netns vm2 ip netns exec vm2 ip link set vm2 address 00:de:ad:01:00:02 ip netns exec vm2 ip addr add 173.0.2.2/24 dev vm2 ip netns exec vm2 ip link set vm2 up ovs-vsctl set Interface vm2 external_ids:iface-id=vm2 #set provide network ovs-vsctl add-br nat_test ip link set nat_test up ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=nattest:nat_test ip netns add vm0 ovs-vsctl add-port nat_test vm0 -- set interface vm0 type=internal ip link set vm0 netns vm0 ip netns exec vm0 ip link set vm0 address 00:00:00:00:00:01 ip netns exec vm0 ip addr add 172.16.0.100/16 dev vm0 ip netns exec vm0 ip link set vm0 up ovs-vsctl set Interface vm0 external_ids:iface-id=vm0 ip netns exec vm1 ip route add default via 173.0.1.1 ip netns exec vm2 ip route add default via 173.0.2.1 ovn-nbctl --wait=hv sync sleep 30 ip netns exec vm1 ping 172.16.0.102 -c 1 ip netns exec vm1 ping 172.16.0.100 -c 1 sleep 30 systemctl stop ovn-northd sleep 60 systemctl start ovn-northd ovn-nbctl ls-add ls_test ovn-nbctl --wait=sb sync reproduced on ovn20.09.0-10: [root@dell-per740-12 bz1776712_broadcast_limit]# rpm -qa | grep ovn ovn2.13-central-20.09.0-10.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-common-1.0-13.noarch kernel-kernel-networking-openvswitch-ovn-regression-bz1787318_ovn_controller_crash-1.0-3.noarch ovn2.13-20.09.0-10.el7fdp.x86_64 ovn2.13-host-20.09.0-10.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-regression-bz1776712_broadcast_limit-1.0-1.noarch :: [ 01:55:33 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=sb sync' <=== stuck here [root@dell-per740-12 bz1776712_broadcast_limit]# tail -10 /var/log/ovn/ovn-northd.log 2020-11-13T07:12:56.703Z|00127|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connecting... 2020-11-13T07:12:56.703Z|00128|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connected 2020-11-13T07:12:56.703Z|00129|ovn_northd|INFO|ovn-northd lock lost. This ovn-northd instance is now on standby. 2020-11-13T07:13:03.913Z|00130|ovn_northd|INFO|ovn-northd lock acquired. This ovn-northd instance is now active. 2020-11-13T07:13:13.922Z|00131|reconnect|ERR|unix:/run/ovn/ovnsb_db.sock: no response to inactivity probe after 5 seconds, disconnecting 2020-11-13T07:13:13.922Z|00132|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connection dropped 2020-11-13T07:13:14.923Z|00133|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connecting... 2020-11-13T07:13:14.923Z|00134|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connected 2020-11-13T07:13:14.923Z|00135|ovn_northd|INFO|ovn-northd lock lost. This ovn-northd instance is now on standby. 2020-11-13T07:13:21.908Z|00136|ovn_northd|INFO|ovn-northd lock acquired. This ovn-northd instance is now active. <=== time out when connect, no response to inactivity probe after 5 seconds Verified on ovn20.09.0-11: :: [ 02:31:24 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=sb sync' :: [ 02:31:34 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=sb sync' (Expected 0, got 0) :: [ 02:31:34 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=hv sync' :: [ 02:57:46 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=hv sync' (Expected 0, got 0) :: [ 02:57:46 ] :: [ BEGIN ] :: Running 'ip netns exec vm1 ping 172.16.0.102 -c 1' PING 172.16.0.102 (172.16.0.102) 56(84) bytes of data. 64 bytes from 172.16.0.102: icmp_seq=1 ttl=62 time=97.2 ms --- 172.16.0.102 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 97.203/97.203/97.203/0.000 ms :: [ 02:57:46 ] :: [ PASS ] :: Command 'ip netns exec vm1 ping 172.16.0.102 -c 1' (Expected 0, got 0) :: [ 02:57:46 ] :: [ BEGIN ] :: Running 'ip netns exec vm1 ping 172.16.0.100 -c 1' PING 172.16.0.100 (172.16.0.100) 56(84) bytes of data. 64 bytes from 172.16.0.100: icmp_seq=1 ttl=63 time=29.3 ms --- 172.16.0.100 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 29.370/29.370/29.370/0.000 ms :: [ 02:57:46 ] :: [ PASS ] :: Command 'ip netns exec vm1 ping 172.16.0.100 -c 1' (Expected 0, got 0) :: [ 02:58:16 ] :: [ BEGIN ] :: Running 'systemctl stop ovn-northd' :: [ 02:58:28 ] :: [ PASS ] :: Command 'systemctl stop ovn-northd' (Expected 0, got 0) :: [ 02:59:28 ] :: [ BEGIN ] :: Running 'systemctl start ovn-northd' :: [ 02:59:54 ] :: [ PASS ] :: Command 'systemctl start ovn-northd' (Expected 0, got 0) :: [ 02:59:54 ] :: [ BEGIN ] :: Running 'ovn-nbctl ls-add ls_test' :: [ 02:59:54 ] :: [ PASS ] :: Command 'ovn-nbctl ls-add ls_test' (Expected 0, got 0) :: [ 02:59:54 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=sb sync' :: [ 03:01:12 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=sb sync' (Expected 0, got 0) [root@dell-per740-12 bz1776712_broadcast_limit]# rpm -qa | grep -E "openvswitch|ovn" openvswitch-selinux-extra-policy-1.0-15.el7fdp.noarch ovn2.13-host-20.09.0-11.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-common-1.0-13.noarch openvswitch2.13-2.13.0-59.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-regression-bz1787318_ovn_controller_crash-1.0-3.noarch ovn2.13-central-20.09.0-11.el7fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-regression-bz1776712_broadcast_limit-1.0-1.noarch python3-openvswitch2.13-2.13.0-59.el7fdp.x86_64 ovn2.13-20.09.0-11.el7fdp.x86_64
reproduced on ovn20.09.0-10.el8: [root@dell-per740-42 ~]# rpm -qa | grep -E "openvswitch|ovn" kernel-kernel-networking-openvswitch-ovn-regression-bz1776712_broadcast_limit-1.0-1.noarch openvswitch-selinux-extra-policy-1.0-23.el8fdp.noarch ovn2.13-central-20.09.0-10.el8fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-common-1.0-13.noarch openvswitch2.13-2.13.0-70.el8fdp.x86_64 ovn2.13-20.09.0-10.el8fdp.x86_64 ovn2.13-host-20.09.0-10.el8fdp.x86_64 python3-openvswitch2.13-2.13.0-70.el8fdp.x86_64 [root@dell-per740-42 ~]# tail -f /var/log/ovn/ovn-northd.log 2020-11-16T03:28:16.117Z|00010|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connected 2020-11-16T03:28:16.117Z|00011|ovn_northd|INFO|ovn-northd lock lost. This ovn-northd instance is now on standby. 2020-11-16T03:28:16.479Z|00012|ovn_northd|INFO|ovn-northd lock acquired. This ovn-northd instance is now active. 2020-11-16T03:28:26.482Z|00013|reconnect|ERR|unix:/run/ovn/ovnsb_db.sock: no response to inactivity probe after 5 seconds, disconnecting 2020-11-16T03:28:26.482Z|00014|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connection dropped 2020-11-16T03:28:26.482Z|00015|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: waiting 2 seconds before reconnect 2020-11-16T03:28:28.485Z|00016|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connecting... 2020-11-16T03:28:28.485Z|00017|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connected 2020-11-16T03:28:28.485Z|00018|ovn_northd|INFO|ovn-northd lock lost. This ovn-northd instance is now on standby. 2020-11-16T03:28:38.349Z|00019|ovn_northd|INFO|ovn-northd lock acquired. This ovn-northd instance is now active. 2020-11-16T03:28:48.356Z|00020|reconnect|ERR|unix:/run/ovn/ovnsb_db.sock: no response to inactivity probe after 5 seconds, disconnecting 2020-11-16T03:28:48.356Z|00021|reconnect|INFO|unix:/run/ovn/ovnsb_db.sock: connection dropped :: [ 22:16:00 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=hv sync' :: [ 22:26:12 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=hv sync' (Expected 0, got 0) :: [ 22:26:12 ] :: [ BEGIN ] :: Running 'ip netns exec vm1 ping 172.16.0.102 -c 1' PING 172.16.0.102 (172.16.0.102) 56(84) bytes of data. 64 bytes from 172.16.0.102: icmp_seq=1 ttl=62 time=57.6 ms --- 172.16.0.102 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 57.582/57.582/57.582/0.000 ms :: [ 22:26:12 ] :: [ PASS ] :: Command 'ip netns exec vm1 ping 172.16.0.102 -c 1' (Expected 0, got 0) :: [ 22:26:12 ] :: [ BEGIN ] :: Running 'ip netns exec vm1 ping 172.16.0.100 -c 1' PING 172.16.0.100 (172.16.0.100) 56(84) bytes of data. 64 bytes from 172.16.0.100: icmp_seq=1 ttl=63 time=17.7 ms --- 172.16.0.100 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 17.728/17.728/17.728/0.000 ms :: [ 22:26:12 ] :: [ PASS ] :: Command 'ip netns exec vm1 ping 172.16.0.100 -c 1' (Expected 0, got 0) :: [ 22:26:42 ] :: [ BEGIN ] :: Running 'systemctl stop ovn-northd' :: [ 22:26:50 ] :: [ PASS ] :: Command 'systemctl stop ovn-northd' (Expected 0, got 0) :: [ 22:27:50 ] :: [ BEGIN ] :: Running 'systemctl start ovn-northd' :: [ 22:28:05 ] :: [ PASS ] :: Command 'systemctl start ovn-northd' (Expected 0, got 0) :: [ 22:28:05 ] :: [ BEGIN ] :: Running 'ovn-nbctl ls-add ls_test' :: [ 22:28:05 ] :: [ PASS ] :: Command 'ovn-nbctl ls-add ls_test' (Expected 0, got 0) :: [ 22:28:05 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=sb sync' <=== stuck Verified on ovn20.09.0-11.el8: [root@dell-per740-42 ~]# rpm -qa | grep -E "openvswitch|ovn" kernel-kernel-networking-openvswitch-ovn-regression-bz1776712_broadcast_limit-1.0-1.noarch openvswitch-selinux-extra-policy-1.0-23.el8fdp.noarch ovn2.13-host-20.09.0-11.el8fdp.x86_64 kernel-kernel-networking-openvswitch-ovn-common-1.0-13.noarch openvswitch2.13-2.13.0-70.el8fdp.x86_64 ovn2.13-central-20.09.0-11.el8fdp.x86_64 python3-openvswitch2.13-2.13.0-70.el8fdp.x86_64 ovn2.13-20.09.0-11.el8fdp.x86_64 :: [ 22:40:50 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=sb sync' (Expected 0, got 0) :: [ 22:40:50 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=hv sync' :: [ 22:50:49 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=hv sync' (Expected 0, got 0) :: [ 22:50:49 ] :: [ BEGIN ] :: Running 'ip netns exec vm1 ping 172.16.0.102 -c 1' PING 172.16.0.102 (172.16.0.102) 56(84) bytes of data. 64 bytes from 172.16.0.102: icmp_seq=1 ttl=62 time=33.5 ms --- 172.16.0.102 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 33.518/33.518/33.518/0.000 ms :: [ 22:50:49 ] :: [ PASS ] :: Command 'ip netns exec vm1 ping 172.16.0.102 -c 1' (Expected 0, got 0) :: [ 22:50:49 ] :: [ BEGIN ] :: Running 'ip netns exec vm1 ping 172.16.0.100 -c 1' PING 172.16.0.100 (172.16.0.100) 56(84) bytes of data. 64 bytes from 172.16.0.100: icmp_seq=1 ttl=63 time=18.2 ms --- 172.16.0.100 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 18.247/18.247/18.247/0.000 ms :: [ 22:50:49 ] :: [ PASS ] :: Command 'ip netns exec vm1 ping 172.16.0.100 -c 1' (Expected 0, got 0) :: [ 22:51:19 ] :: [ BEGIN ] :: Running 'systemctl stop ovn-northd' :: [ 22:51:26 ] :: [ PASS ] :: Command 'systemctl stop ovn-northd' (Expected 0, got 0) :: [ 22:52:26 ] :: [ BEGIN ] :: Running 'systemctl start ovn-northd' :: [ 22:52:43 ] :: [ PASS ] :: Command 'systemctl start ovn-northd' (Expected 0, got 0) :: [ 22:52:43 ] :: [ BEGIN ] :: Running 'ovn-nbctl ls-add ls_test' :: [ 22:52:43 ] :: [ PASS ] :: Command 'ovn-nbctl ls-add ls_test' (Expected 0, got 0) :: [ 22:52:43 ] :: [ BEGIN ] :: Running 'ovn-nbctl --wait=sb sync' :: [ 22:53:17 ] :: [ PASS ] :: Command 'ovn-nbctl --wait=sb sync' (Expected 0, got 0) <=== passed
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (ovn2.13 bug fix and enhancement update), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:5308