OVS patch committed May 28 to master as 89b522aee379f7ebd21ec67ffb622118af7e9db1 OVN patch committed Jun 10 to master as 512b884dea3f85791eca44fd1d92956e8282be6d Mark, should these get backported, or are some of them already downstream?
Patches backported in the FDN verson - ovn2.13-20.06.1-16.el7fdn
Verified on ovn2.13-20.06.2-1.el8fdp.x86_64: setup raft: #master ctl_cmd="/usr/share/ovn/scripts/ovn-ctl" ip_s=1.1.1.18 ip_c1=1.1.1.19 ip_c2=1.1.1.20 $ctl_cmd --db-nb-addr=$ip_s --db-nb-create-insecure-remote=yes \ --db-sb-addr=$ip_s --db-sb-create-insecure-remote=yes \ --db-nb-cluster-local-addr=$ip_s --db-sb-cluster-local-addr=$ip_s \ --ovn-northd-nb-db=tcp:$ip_s:6641,tcp:$ip_c1:6641,tcp:$ip_c2:6641 \ --ovn-northd-sb-db=tcp:$ip_s:6642,tcp:$ip_c1:6642,tcp:$ip_c2:6642 start_northd #slave 1 ctl_cmd=/usr/share/ovn/scripts/ovn-ctl ip_s=1.1.1.18 ip_c1=1.1.1.19 ip_c2=1.1.1.20 $ctl_cmd --db-nb-addr=$ip_c1 --db-nb-create-insecure-remote=yes \ --db-sb-addr=$ip_c1 --db-sb-create-insecure-remote=yes \ --db-nb-cluster-local-addr=$ip_c1 --db-sb-cluster-local-addr=$ip_c1 \ --db-nb-cluster-remote-addr=$ip_s --db-sb-cluster-remote-addr=$ip_s \ --ovn-northd-nb-db=tcp:$ip_s:6641,tcp:$ip_c1:6641,tcp:$ip_c2:6641 \ --ovn-northd-sb-db=tcp:$ip_s:6642,tcp:$ip_c1:6642,tcp:$ip_c2:6642 start_northd #slave 2 ctl_cmd=/usr/share/ovn/scripts/ovn-ctl ip_s=1.1.1.18 ip_c1=1.1.1.19 ip_c2=1.1.1.20 $ctl_cmd --db-nb-addr=$ip_c2 --db-nb-create-insecure-remote=yes \ --db-sb-addr=$ip_c2 --db-sb-create-insecure-remote=yes \ --db-nb-cluster-local-addr=$ip_c2 --db-sb-cluster-local-addr=$ip_c2 \ --db-nb-cluster-remote-addr=$ip_s --db-sb-cluster-remote-addr=$ip_s \ --ovn-northd-nb-db=tcp:$ip_s:6641,tcp:$ip_c1:6641,tcp:$ip_c2:6641 \ --ovn-northd-sb-db=tcp:$ip_s:6642,tcp:$ip_c1:6642,tcp:$ip_c2:6642 start_northd start ovn-controller: ip_s=1.1.1.18 ip_c1=1.1.1.19 ip_c2=1.1.1.20 ip_r=1.1.1.21 systemctl start openvswitch ovs-vsctl set Open_vSwitch . external-ids:system-id=hv0 ovs-vsctl set Open_vSwitch . external-ids:ovn-remote="tcp:${ip_s}:6642,tcp:${ip_c1}:6642,tcp:${ip_c2}:6642" ovs-vsctl set Open_vSwitch . external-ids:ovn-encap-type=geneve ovs-vsctl set Open_vSwitch . external-ids:ovn-encap-ip=$ip_r systemctl start ovn-controller #chassis status [root@wsfd-advnetlab18 bz1829109]# ovn-sbctl show Chassis hv0 hostname: wsfd-advnetlab21.anl.lab.eng.bos.redhat.com Encap geneve ip: "1.1.1.21" options: {csum="true"} # rm sb db on all nodes and restart rm /etc/ovn/ovnsb_db.db -f ssh -q 1.1.1.19 rm /etc/ovn/ovnsb_db.db -f ssh -q 1.1.1.20 rm /etc/ovn/ovnsb_db.db -f $ctl_cmd --db-nb-addr=$ip_s --db-nb-create-insecure-remote=yes \ --db-sb-addr=$ip_s --db-sb-create-insecure-remote=yes \ --db-nb-cluster-local-addr=$ip_s --db-sb-cluster-local-addr=$ip_s \ --ovn-northd-nb-db=tcp:$ip_s:6641,tcp:$ip_c1:6641,tcp:$ip_c2:6641 \ --ovn-northd-sb-db=tcp:$ip_s:6642,tcp:$ip_c1:6642,tcp:$ip_c2:6642 restart_northd error on ovn-controller in /var/log/ovn/ovn-controller.log: 2020-08-25T06:47:01.954Z|00135|reconnect|INFO|tcp:1.1.1.19:6642: connected 2020-08-25T06:47:01.956Z|00136|ovsdb_idl|WARN|tcp:1.1.1.19:6642: clustered database server has stale data; trying another server 2020-08-25T06:47:09.962Z|00137|reconnect|INFO|tcp:1.1.1.18:6642: connected 2020-08-25T06:47:09.963Z|00138|ovsdb_idl|WARN|tcp:1.1.1.18:6642: clustered database server has stale data; trying another server 2020-08-25T06:47:17.967Z|00139|reconnect|INFO|tcp:1.1.1.20:6642: connected 2020-08-25T06:47:17.969Z|00140|ovsdb_idl|WARN|tcp:1.1.1.20:6642: clustered database server has stale data; trying another server # reset on ovn-controller ovn-appctl -t ovn-controller sb-cluster-state-reset contrller is connected: 2020-08-25T06:48:05.997Z|00152|ovsdb_idl|WARN|tcp:1.1.1.20:6642: clustered database server has stale data; trying another server 2020-08-25T06:48:13.922Z|00153|main|INFO|Resetting southbound database cluster state 2020-08-25T06:48:13.997Z|00154|reconnect|INFO|tcp:1.1.1.19:6642: connected 2020-08-25T06:48:14.003Z|00155|main|INFO|OVNSB IDL reconnected, force recompute. [root@wsfd-advnetlab18 bz1829109]# ovn-sbctl show Chassis hv0 hostname: wsfd-advnetlab21.anl.lab.eng.bos.redhat.com Encap geneve ip: "1.1.1.21" options: {csum="true"}
also verify on rhel7 version: [root@wsfd-advnetlab21 bz1829109]# rpm -qa | grep -E "openvswitch|ovn" openvswitch2.13-2.13.0-41.el7fdp.x86_64 ovn2.13-20.06.2-1.el7fdp.x86_64 openvswitch-selinux-extra-policy-1.0-15.el7fdp.noarch ovn2.13-central-20.06.2-1.el7fdp.x86_64 ovn2.13-host-20.06.2-1.el7fdp.x86_64 2020-08-25T08:01:54.784Z|00037|reconnect|INFO|tcp:1.1.1.19:6642: connected 2020-08-25T08:01:54.786Z|00038|ovsdb_idl|INFO|tcp:1.1.1.19:6642: clustered database server is disconnected from cluster; trying another server 2020-08-25T08:01:54.786Z|00039|reconnect|INFO|tcp:1.1.1.19:6642: connection attempt timed out 2020-08-25T08:01:54.786Z|00040|reconnect|INFO|tcp:1.1.1.19:6642: continuing to reconnect in the background but suppressing further logging 2020-08-25T08:02:02.787Z|00041|reconnect|INFO|tcp:1.1.1.18:6642: connected 2020-08-25T08:02:02.789Z|00042|ovsdb_idl|WARN|tcp:1.1.1.18:6642: clustered database server has stale data; trying another server 2020-08-25T08:02:10.795Z|00043|reconnect|INFO|tcp:1.1.1.20:6642: connected 2020-08-25T08:02:10.796Z|00044|ovsdb_idl|INFO|tcp:1.1.1.20:6642: clustered database server is disconnected from cluster; trying another server 2020-08-25T08:02:18.804Z|00045|reconnect|INFO|tcp:1.1.1.19:6642: connected 2020-08-25T08:02:18.806Z|00046|ovsdb_idl|INFO|tcp:1.1.1.19:6642: clustered database server is disconnected from cluster; trying another server [root@wsfd-advnetlab21 bz1829109]# ovn-appctl -t ovn-controller sb-cluster-state-reset 2020-08-25T08:02:32.196Z|00049|main|INFO|Resetting southbound database cluster state 2020-08-25T08:02:34.816Z|00050|reconnect|INFO|tcp:1.1.1.20:6642: connected 2020-08-25T08:02:34.818Z|00051|ovsdb_idl|INFO|tcp:1.1.1.20:6642: clustered database server is disconnected from cluster; trying another server 2020-08-25T08:02:42.821Z|00052|reconnect|INFO|tcp:1.1.1.19:6642: connected 2020-08-25T08:02:42.825Z|00053|ovsdb_idl|INFO|tcp:1.1.1.19:6642: clustered database server is disconnected from cluster; trying another server 2020-08-25T08:02:50.833Z|00054|reconnect|INFO|tcp:1.1.1.18:6642: connected 2020-08-25T08:02:50.839Z|00055|main|INFO|OVNSB IDL reconnected, force recompute. [root@wsfd-advnetlab18 bz1829109]# ovn-sbctl show Chassis hv0 hostname: wsfd-advnetlab21.anl.lab.eng.bos.redhat.com Encap geneve ip: "1.1.1.21" options: {csum="true"}
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (ovn2.13 bug fix and enhancement update), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:3769
The needinfo request[s] on this closed bug have been removed as they have been unresolved for 1000 days