Created attachment 1410305 [details] ovs-vswitchd.log Description of problem: Testing the fast forward upgrade(ffu) workflow in a dpdk environment with following steps: 1) fresh newton deployment with 3 virtual controllers and 2 baremetal computes 2) update ovs package and config on overcloud nodes manually[1] (ovs-2.6 to ovs-2.8), the changes can be found below at [1]. reboot the changed nodes. note: after rebooting in step 2, the vms created in this dpdk environment can talk with each other and external network via underlying ovs bridge and dpdk port. 3) undercloud ffu from newton to queens 4) overcloud ffu 5) check ovs-vsctl on overcloud compute nodes [root@overcloud-novacompute-1 ~]# cat /etc/sysconfig/network-scripts/ifcfg-br-link # This file is autogenerated by os-net-config DEVICE=br-link ONBOOT=yes HOTPLUG=no NM_CONTROLLED=no PEERDNS=no DEVICETYPE=ovs TYPE=OVSUserBridge OVS_EXTRA="set bridge br-link fail_mode=standalone" [root@overcloud-novacompute-1 ~]# cat /etc/sysconfig/network-scripts/ifcfg-dpdk0 # This file is autogenerated by os-net-config DEVICE=dpdk0 ONBOOT=yes HOTPLUG=no NM_CONTROLLED=no PEERDNS=no DEVICETYPE=ovs TYPE=OVSDPDKPort OVS_BRIDGE=br-link OVS_EXTRA="set Interface $DEVICE options:dpdk-devargs=0000:06:00.1" [root@overcloud-novacompute-1 ~]# ovs-vsctl get bridge br-link datapath_type system [root@overcloud-novacompute-1 ~]# ovs-vsctl get bridge br-int datapath_type system [root@overcloud-novacompute-1 ~]# ovs-vsctl show 405d9a38-88d0-4a0b-a93f-4f0b923293b9 Manager "ptcp:6640:127.0.0.1" is_connected: true Bridge br-int Controller "tcp:127.0.0.1:6633" is_connected: true fail_mode: secure Port "vhu79689427-c0" tag: 1 Interface "vhu79689427-c0" type: dpdkvhostuser error: "could not add network device vhu79689427-c0 to ofproto (Invalid argument)" Port "vhu79a24a2b-58" tag: 2 Interface "vhu79a24a2b-58" type: dpdkvhostuser error: "could not add network device vhu79a24a2b-58 to ofproto (Invalid argument)" Port "vhu94a59f60-74" tag: 2 Interface "vhu94a59f60-74" type: dpdkvhostuser error: "could not add network device vhu94a59f60-74 to ofproto (Invalid argument)" Port int-br-link Interface int-br-link type: patch options: {peer=phy-br-link} Port "vhu1216a977-05" tag: 1 Interface "vhu1216a977-05" type: dpdkvhostuser error: "could not add network device vhu1216a977-05 to ofproto (Invalid argument)" Port br-int Interface br-int type: internal Port patch-tun Interface patch-tun type: patch options: {peer=patch-int} Port int-br-ex Interface int-br-ex type: patch options: {peer=phy-br-ex} Bridge br-isolation fail_mode: standalone Port "vlan202" tag: 202 Interface "vlan202" type: internal Port br-isolation Interface br-isolation type: internal Port "vlan203" tag: 203 Interface "vlan203" type: internal Port "vlan201" tag: 201 Interface "vlan201" type: internal Port "p5p1" Interface "p5p1" Bridge br-tun Controller "tcp:127.0.0.1:6633" is_connected: true fail_mode: secure Port "vxlan-ac110213" Interface "vxlan-ac110213" type: vxlan options: {df_default="true", in_key=flow, local_ip="172.17.2.10", out_key=flow, remote_ip="172.17.2.19"} Port br-tun Interface br-tun type: internal Port "vxlan-ac11020e" Interface "vxlan-ac11020e" type: vxlan options: {df_default="true", in_key=flow, local_ip="172.17.2.10", out_key=flow, remote_ip="172.17.2.14"} Port patch-int Interface patch-int type: patch options: {peer=patch-tun} Port "vxlan-ac110212" Interface "vxlan-ac110212" type: vxlan options: {df_default="true", in_key=flow, local_ip="172.17.2.10", out_key=flow, remote_ip="172.17.2.18"} Port "vxlan-ac110214" Interface "vxlan-ac110214" type: vxlan options: {df_default="true", in_key=flow, local_ip="172.17.2.10", out_key=flow, remote_ip="172.17.2.20"} Bridge br-ex Controller "tcp:127.0.0.1:6633" is_connected: true fail_mode: secure Port br-ex Interface br-ex type: internal Port phy-br-ex Interface phy-br-ex type: patch options: {peer=int-br-ex} Bridge br-link Controller "tcp:127.0.0.1:6633" is_connected: true fail_mode: secure Port phy-br-link Interface phy-br-link type: patch options: {peer=int-br-link} Port "dpdk0" Interface "dpdk0" type: dpdk options: {dpdk-devargs="0000:06:00.1"} error: "could not add network device dpdk0 to ofproto (Invalid argument)" Port br-link Interface br-link type: internal ovs_version: "2.8.2" I tried to restart openvswitch service with 'systemctl restart openvswitch', but the datapath type of ovs bridges(br-int, br-link) always set to system instead of netdev. [1]: manually changed the ovs config to 2.8 with following ansible plays ansible play: https://github.com/zshi-redhat/ovs_upgrade/tree/master/tasks the changed config are: a. the openvswitch user and hugetlbfs group are created first. b. set default OVS_USER_ID [root@overcloud-novacompute-1 ~]# cat /etc/openvswitch/default.conf OVS_USER_ID="openvswitch:hugetlbfs" c. change libvirt qmeu.conf to group hugetlbfs add group='hugetlbfs' in qemu.conf note: this was overwritten during ffu. d. change the ownership of /var/run/openvswitch; /etc/openvswitch etc [root@overcloud-novacompute-1 ~]# ls -al /var/run/openvswitch/ total 312 drwxr-xr-x. 2 openvswitch hugetlbfs 420 Mar 20 08:50 . drwxr-xr-x. 56 root root 1620 Mar 20 08:50 .. srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-ex.mgmt srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-ex.snoop srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-int.mgmt srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-int.snoop srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-isolation.mgmt srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-isolation.snoop srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-link.mgmt srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-link.snoop srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-tun.mgmt srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 br-tun.snoop srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 db.sock srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 ovsdb-server.176963.ctl -rw-r--r--. 1 openvswitch hugetlbfs 7 Mar 20 08:50 ovsdb-server.pid srwxr-x---. 1 openvswitch hugetlbfs 0 Mar 20 08:50 ovs-vswitchd.177013.ctl -rw-r--r--. 1 openvswitch hugetlbfs 7 Mar 20 08:50 ovs-vswitchd.pid srwxr-xr-x. 1 openvswitch hugetlbfs 0 Mar 20 08:50 pdump_server_socket -rw-r-----. 1 openvswitch hugetlbfs 208420 Mar 20 08:50 .rte_config -rw-r--r--. 1 openvswitch hugetlbfs 99456 Mar 20 08:50 .rte_hugepage_info srwxr-xr-x. 1 openvswitch hugetlbfs 0 Mar 20 08:50 .rte_mp_socket [root@overcloud-novacompute-1 ~]# ls -al /etc/openvswitch/ total 180 drwxr-xr-x. 2 openvswitch hugetlbfs 152 Mar 20 07:09 . drwxr-xr-x. 139 root root 8192 Mar 20 04:45 .. -rw-r--r--. 1 openvswitch hugetlbfs 106682 Mar 20 08:50 conf.db -rw-r--r--. 1 root root 50044 Mar 20 02:47 conf.db.backup7.14.0-3974332717 -rw-------. 1 openvswitch hugetlbfs 0 Mar 20 01:55 .conf.db.~lock~ -rw-------. 1 openvswitch hugetlbfs 0 Mar 20 02:47 .conf.db.tmp.~lock~ -rw-r--r--. 1 openvswitch hugetlbfs 175 Mar 20 02:44 default.conf -rw-r--r--. 1 openvswitch hugetlbfs 37 Mar 20 01:55 system-id.conf Version-Release number of selected component (if applicable): How reproducible: Steps to Reproduce: 1. 2. 3. Actual results: Expected results: Additional info:
i do not have the log/configs to check but im guessing the datapath_type config option was not set to netdev. https://github.com/openstack/neutron/blob/66c8ed9c973a9b6df727517f96df40d2ecf467d6/neutron/conf/plugins/ml2/drivers/ovs_conf.py#L74-L76 or rather im assumeing it was override on FFU and the netdev value was removed and the l2 agent is using the default system
Any updates on this, or other thoughts on who should investigate?