Description of problem: Create a cluster on power with cluster nodes deployed using SRIOV network connection. The cluster creation completes and all nodes and pods and co’s are healthy. Migrate the CNI from openshift-sdn to ovnkube as per instructions given at https://docs.openshift.com/container-platform/4.6/networking/ovn_kubernetes_network_provider/migrate-from-openshift-sdn.html Issue 1 - Many csr’s remain in pending state, nodes remain in not ready state. Manual csr approval is required multiple times. After that nodes go to Ready state. Issue 2 - Multiple ovnkube pods keep restarting and move to Crashloopbackoff state on multiple nodes of the cluster. Version-Release number of selected component (if applicable): OCP 4.7 How reproducible: Many times. Steps to Reproduce: 1. Deploy a cluster on powervm with VMs having SRIOV network type. 2. Migrate the CNI from default sdn to OVNKube 3. Actual results: Issue 1 - Many csr’s remain in pending state, nodes remain in not ready state. Manual csr approval is required multiple times. After that nodes go to Ready state. ``` [root@arch-ovn-bastion ~]# oc get csr | grep -i Pending csr-lhzpv 112s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-n2gkz 4m10s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-tzpgd 60s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending [root@arch-ovn-bastion ~]# oc get csr | grep -i Pending csr-2gksq 11m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-64snm 28m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-7wl4d 13m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-9fzcw 41m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-9p7v8 38m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-9wcv8 23m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-clm5l 50m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-f2jdb 26m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-fq9qz 5m10s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-h7fjq 54m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-k7z7c 11m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-l4cfz 20m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-lhzpv 57m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-m7p4j 26m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-n2gkz 59m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-pmlmk 7m51s kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-q6glk 44m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-sc9g5 41m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-tzpgd 56m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending csr-zqwrf 35m kubernetes.io/kube-apiserver-client-kubelet system:serviceaccount:openshift-machine-config-operator:node-bootstrapper Pending [root@arch-ovn-bastion ~]# oc get csr -ojson | jq -r '.items[] | select(.status == {} ) | .metadata.name' | xargs -r oc adm certificate approve certificatesigningrequest.certificates.k8s.io/csr-2gksq approved certificatesigningrequest.certificates.k8s.io/csr-64snm approved certificatesigningrequest.certificates.k8s.io/csr-7wl4d approved certificatesigningrequest.certificates.k8s.io/csr-9fzcw approved certificatesigningrequest.certificates.k8s.io/csr-9p7v8 approved certificatesigningrequest.certificates.k8s.io/csr-9wcv8 approved certificatesigningrequest.certificates.k8s.io/csr-clm5l approved certificatesigningrequest.certificates.k8s.io/csr-f2jdb approved certificatesigningrequest.certificates.k8s.io/csr-fq9qz approved certificatesigningrequest.certificates.k8s.io/csr-h7fjq approved certificatesigningrequest.certificates.k8s.io/csr-k7z7c approved certificatesigningrequest.certificates.k8s.io/csr-l4cfz approved certificatesigningrequest.certificates.k8s.io/csr-lhzpv approved certificatesigningrequest.certificates.k8s.io/csr-m7p4j approved certificatesigningrequest.certificates.k8s.io/csr-n2gkz approved certificatesigningrequest.certificates.k8s.io/csr-pmlmk approved certificatesigningrequest.certificates.k8s.io/csr-q6glk approved certificatesigningrequest.certificates.k8s.io/csr-sc9g5 approved certificatesigningrequest.certificates.k8s.io/csr-tzpgd approved certificatesigningrequest.certificates.k8s.io/csr-zqwrf approved [root@arch-ovn-bastion ~]# oc get csr | grep -i Pending csr-bf8dc 24s kubernetes.io/kubelet-serving system:node:master-0 Pending csr-g7v52 30s kubernetes.io/kubelet-serving system:node:worker-1 Pending csr-jh6gr 25s kubernetes.io/kubelet-serving system:node:worker-0 Pending csr-ljc87 20s kubernetes.io/kubelet-serving system:node:master-1 Pending csr-x278f 30s kubernetes.io/kubelet-serving system:node:master-2 Pending [root@arch-ovn-bastion ~]# oc get csr -ojson | jq -r '.items[] | select(.status == {} ) | .metadata.name' | xargs -r oc adm certificate approve certificatesigningrequest.certificates.k8s.io/csr-bf8dc approved certificatesigningrequest.certificates.k8s.io/csr-g7v52 approved certificatesigningrequest.certificates.k8s.io/csr-jh6gr approved certificatesigningrequest.certificates.k8s.io/csr-ljc87 approved certificatesigningrequest.certificates.k8s.io/csr-x278f approved [root@arch-ovn-bastion ~]# oc get csr | grep -i Pending [root@arch-ovn-bastion ~]# oc get nodes NAME STATUS ROLES AGE VERSION master-0 Ready master 27h v1.19.2+ad738ba master-1 Ready master 29h v1.19.2+ad738ba master-2 Ready master 29h v1.19.2+ad738ba worker-0 Ready worker 28h v1.19.2+ad738ba worker-1 Ready worker 28h v1.19.2+ad738ba ``` Issue 2 - Multiple ovnkube pods keep restarting and move to Crashloopbackoff state on multiple nodes of the cluster. ``` [root@arch-ovn-bastion auth]# oc get pods -n openshift-ovn-kubernetes -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES ovnkube-master-4gm8f 6/6 Running 1 24h 9.47.89.3 master-2 <none> <none> ovnkube-master-nfcpc 6/6 Running 0 6h21m 9.47.89.78 master-1 <none> <none> ovnkube-master-xd9fx 6/6 Running 0 6h21m 9.47.89.75 master-0 <none> <none> ovnkube-node-5rdnd 2/3 CrashLoopBackOff 224 24h 9.47.89.52 worker-1 <none> <none> ovnkube-node-9m85v 2/3 CrashLoopBackOff 76 24h 9.47.89.3 master-2 <none> <none> ovnkube-node-qw66k 2/3 CrashLoopBackOff 8 6h21m 9.47.89.78 master-1 <none> <none> ovnkube-node-whf6h 3/3 Running 0 6h21m 9.47.89.75 master-0 <none> <none> ovnkube-node-wmbqc 3/3 Running 0 6h21m 9.47.89.76 worker-0 <none> <none> ovs-node-8hk44 1/1 Running 0 24h 9.47.89.52 worker-1 <none> <none> ovs-node-gr797 1/1 Running 0 24h 9.47.89.3 master-2 <none> <none> ovs-node-k945c 1/1 Running 0 6h21m 9.47.89.78 master-1 <none> <none> ovs-node-mxbn4 1/1 Running 0 6h21m 9.47.89.75 master-0 <none> <none> ovs-node-r2qrc 1/1 Running 0 6h21m 9.47.89.76 worker-0 <none> <none> ``` ``` [root@arch-ovn-bastion auth]# oc describe pod ovnkube-node-qw66k -n openshift-ovn-kubernetes Name: ovnkube-node-qw66k Namespace: openshift-ovn-kubernetes Priority: 2000001000 Priority Class Name: system-node-critical Node: master-1/9.47.89.78 Start Time: Fri, 18 Dec 2020 03:18:44 -0500 Labels: app=ovnkube-node component=network controller-revision-hash=55b769bc94 kubernetes.io/os=linux openshift.io/component=network pod-template-generation=1 type=infra Annotations: <none> Status: Running IP: 9.47.89.78 IPs: IP: 9.47.89.78 Controlled By: DaemonSet/ovnkube-node Containers: ovn-controller: Container ID: cri-o://8e2baade58b993fb05c31825b5bb71ba2445de122927d1f7acc04498d39c0911 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2 Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2 Port: <none> Host Port: <none> Command: /bin/bash -c set -e if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "$(date -Iseconds) - starting ovn-controller" exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ -vconsole:"${OVN_LOG_LEVEL}" State: Running Started: Fri, 18 Dec 2020 09:19:34 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 300Mi Environment: OVN_LOG_LEVEL: info K8S_NODE: (v1:spec.nodeName) Mounts: /env from env-overrides (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/openvswitch from run-openvswitch (rw) /run/ovn/ from run-ovn (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-xs2t8 (ro) kube-rbac-proxy: Container ID: cri-o://f6dc3822aaa26ce0910ea48dd89c377bb88c2165e4a7401da53adffb810c93ea Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:c256d48a779cced38827050398eb8b752b6656094561db46f6a20a05aa5d9005 Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:c256d48a779cced38827050398eb8b752b6656094561db46f6a20a05aa5d9005 Port: 9103/TCP Host Port: 9103/TCP Command: /bin/bash -c #!/bin/bash set -euo pipefail TLS_PK=/etc/pki/tls/metrics-cert/tls.key TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt # As the secret mount is optional we must wait for the files to be present. # The service is created in monitor.yaml and this is created in sdn.yaml. # If it isn't created there is probably an issue so we want to crashloop. retries=0 TS=$(date +%s) WARN_TS=$(( ${TS} + $(( 20 * 60)) )) HAS_LOGGED_INFO=0 log_missing_certs(){ CUR_TS=$(date +%s) if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. HAS_LOGGED_INFO=1 fi } while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do log_missing_certs sleep 5 done echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy exec /usr/bin/kube-rbac-proxy \ --logtostderr \ --secure-listen-address=:9103 \ --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ --upstream=http://127.0.0.1:29103/ \ --tls-private-key-file=${TLS_PK} \ --tls-cert-file=${TLS_CERT} State: Running Started: Fri, 18 Dec 2020 09:19:35 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 20Mi Environment: <none> Mounts: /etc/pki/tls/metrics-cert from ovn-node-metrics-cert (ro) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-xs2t8 (ro) ovnkube-node: Container ID: cri-o://cae0ed0206ef46276f5d7172713a72dbf8acc9dbf1d8823ca1bf3e8c4b618ad2 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2 Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2 Port: 29103/TCP Host Port: 29103/TCP Command: /bin/bash -c set -xe if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db_ip addresses" cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ ovn_config_namespace=openshift-ovn-kubernetes echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK retries=0 while true; do # TODO: change to use '--request-timeout=30s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. db_ip=$(timeout 30 kubectl get ep -n ${ovn_config_namespace} ovnkube-db -o jsonpath='{.subsets[0].addresses[0].ip}') if [[ -n "${db_ip}" ]]; then break fi (( retries += 1 )) if [[ "${retries}" -gt 40 ]]; then echo "E$(date "+%m%d %H:%M:%S.%N") - db endpoint never came up" exit 1 fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db endpoint" sleep 5 done echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node db_ip ${db_ip}" gateway_mode_flags= # Check to see if ovs is provided by the node. This is only for upgrade from 4.5->4.6 or # openshift-sdn to ovn-kube conversion if grep -q OVNKubernetes /etc/systemd/system/ovs-configuration.service ; then gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" else gateway_mode_flags="--gateway-mode local --gateway-interface none" fi exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ --nb-address "ssl:9.47.89.3:9641,ssl:9.47.89.75:9641,ssl:9.47.89.78:9641" \ --sb-address "ssl:9.47.89.3:9642,ssl:9.47.89.75:9642,ssl:9.47.89.78:9642" \ --nb-client-privkey /ovn-cert/tls.key \ --nb-client-cert /ovn-cert/tls.crt \ --nb-client-cacert /ovn-ca/ca-bundle.crt \ --nb-cert-common-name "ovn" \ --sb-client-privkey /ovn-cert/tls.key \ --sb-client-cert /ovn-cert/tls.crt \ --sb-client-cacert /ovn-ca/ca-bundle.crt \ --sb-cert-common-name "ovn" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --loglevel "${OVN_KUBE_LOG_LEVEL}" \ --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ ${gateway_mode_flags} \ --metrics-bind-address "127.0.0.1:29103" State: Waiting Reason: CrashLoopBackOff Last State: Terminated Reason: Error Message: kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc panic: runtime error: invalid memory address or nil pointer dereference [recovered] panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x10efb48] goroutine 275 [running]: k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x148 panic(0x12ed160, 0x21f7e20) /usr/lib/golang/src/runtime/panic.go:969 +0x148 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0x0, 0x0, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc000223788, 0xc000223700, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc000a12be0, 0xc0005e3f88, 0xc000a12be0, 0xc0003ce060) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc000223788, 0x5ba698, 0x7be88) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc0001dba60, 0xc0003ce0c0, 0xc000337f30) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc Exit Code: 2 Started: Fri, 18 Dec 2020 09:36:23 -0500 Finished: Fri, 18 Dec 2020 09:36:26 -0500 Ready: False Restart Count: 8 Requests: cpu: 10m memory: 300Mi Readiness: exec [test -f /etc/cni/net.d/10-ovn-kubernetes.conf] delay=5s timeout=1s period=5s #success=1 #failure=3 Environment: KUBERNETES_SERVICE_PORT: 6443 KUBERNETES_SERVICE_HOST: api-int.arch-ovn.redhat.com OVN_CONTROLLER_INACTIVITY_PROBE: 30000 OVN_KUBE_LOG_LEVEL: 4 K8S_NODE: (v1:spec.nodeName) Mounts: /cni-bin-dir from host-cni-bin (rw) /env from env-overrides (rw) /etc/cni/net.d from host-cni-netd (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /etc/systemd/system from systemd-units (ro) /host from host-slash (ro) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/netns from host-run-netns (ro) /run/openvswitch from run-openvswitch (rw) /run/ovn-kubernetes/ from host-run-ovn-kubernetes (rw) /run/ovn/ from run-ovn (rw) /run/ovnkube-config/ from ovnkube-config (rw) /var/lib/cni/networks/ovn-k8s-cni-overlay from host-var-lib-cni-networks-ovn-kubernetes (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-xs2t8 (ro) Conditions: Type Status Initialized True Ready False ContainersReady False PodScheduled True Volumes: systemd-units: Type: HostPath (bare host directory volume) Path: /etc/systemd/system HostPathType: host-slash: Type: HostPath (bare host directory volume) Path: / HostPathType: host-run-netns: Type: HostPath (bare host directory volume) Path: /run/netns HostPathType: var-lib-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/data HostPathType: etc-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/etc HostPathType: run-openvswitch: Type: HostPath (bare host directory volume) Path: /var/run/openvswitch HostPathType: run-ovn: Type: HostPath (bare host directory volume) Path: /var/run/ovn HostPathType: host-run-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /run/ovn-kubernetes HostPathType: host-cni-bin: Type: HostPath (bare host directory volume) Path: /var/lib/cni/bin HostPathType: host-cni-netd: Type: HostPath (bare host directory volume) Path: /var/run/multus/cni/net.d HostPathType: host-var-lib-cni-networks-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /var/lib/cni/networks/ovn-k8s-cni-overlay HostPathType: ovnkube-config: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovnkube-config Optional: false env-overrides: Type: ConfigMap (a volume populated by a ConfigMap) Name: env-overrides Optional: true ovn-ca: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovn-ca Optional: false ovn-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-cert Optional: false ovn-node-metrics-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-node-metrics-cert Optional: true ovn-kubernetes-node-token-xs2t8: Type: Secret (a volume populated by a Secret) SecretName: ovn-kubernetes-node-token-xs2t8 Optional: false QoS Class: Burstable Node-Selectors: beta.kubernetes.io/os=linux Tolerations: op=Exists Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Pulled 127m (x55 over 6h22m) kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2" already present on machine Warning BackOff 27m (x1608 over 6h21m) kubelet Back-off restarting failed container Warning FailedMount 21m (x2 over 21m) kubelet MountVolume.SetUp failed for volume "ovn-cert" : failed to sync secret cache: timed out waiting for the condition Warning FailedMount 21m (x2 over 21m) kubelet MountVolume.SetUp failed for volume "env-overrides" : failed to sync configmap cache: timed out waiting for the condition Warning FailedMount 21m (x2 over 21m) kubelet MountVolume.SetUp failed for volume "ovn-kubernetes-node-token-xs2t8" : failed to sync secret cache: timed out waiting for the condition Warning FailedMount 21m (x2 over 21m) kubelet MountVolume.SetUp failed for volume "ovn-ca" : failed to sync configmap cache: timed out waiting for the condition Warning FailedMount 21m (x2 over 21m) kubelet MountVolume.SetUp failed for volume "ovnkube-config" : failed to sync configmap cache: timed out waiting for the condition Warning FailedMount 21m (x2 over 21m) kubelet MountVolume.SetUp failed for volume "ovn-node-metrics-cert" : failed to sync secret cache: timed out waiting for the condition Normal Pulled 21m kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2" already present on machine Normal Pulled 21m kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:c256d48a779cced38827050398eb8b752b6656094561db46f6a20a05aa5d9005" already present on machine Normal Started 21m kubelet Started container ovn-controller Normal Created 21m kubelet Created container ovn-controller Normal Created 21m kubelet Created container kube-rbac-proxy Normal Started 21m kubelet Started container kube-rbac-proxy Normal Created 21m kubelet Created container ovnkube-node Normal Started 21m kubelet Started container ovnkube-node Warning Unhealthy 21m (x3 over 21m) kubelet Readiness probe failed: Normal Pulled 21m (x2 over 21m) kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:33ba26a139bd385a386d031e5c1dd59e689f4b2d9b86300ffc4f20a3870076e2" already present on machine Warning BackOff 88s (x90 over 21m) kubelet Back-off restarting failed container ``` Expected results: Manual csr approval should not be required. OVNkube nodes should be in running state on all cluster nodes. Additional info:
ovnkube-node-5rdnd 2/3 CrashLoopBackOff 224 24h 9.47.89.52 worker-1 <none> <none> ovnkube-node-9m85v 2/3 CrashLoopBackOff 76 24h 9.47.89.3 master-2 <none> <none> ovnkube-node-qw66k 2/3 CrashLoopBackOff 8 6h21m 9.47.89.78 master-1 <none> <none> Can you provide oc describe pod output for the other two crashing pods ( ovnkube-node-5rdnd and ovnkube-node-9m85v ) listed?
I have created a new cluster as I couldn't access one of the nodes on the earlier cluster. # oc get nodes NAME STATUS ROLES AGE VERSION master-0 Ready master 10d v1.19.2+ad738ba master-1 Ready master 10d v1.19.2+ad738ba master-2 Ready master 10d v1.19.2+ad738ba worker-0 Ready worker 10d v1.19.2+ad738ba worker-1 Ready worker 10d v1.19.2+ad738ba # oc get network.config/cluster -o jsonpath='{.status.networkType}{"\n"}' OVNKubernetes # oc get pods -A -owide | grep ovn openshift-ovn-kubernetes ovnkube-master-9262w 6/6 Running 2 31h 9.47.89.78 master-2 <none> <none> openshift-ovn-kubernetes ovnkube-master-thsdt 6/6 Running 1 31h 9.47.89.3 master-0 <none> <none> openshift-ovn-kubernetes ovnkube-master-xf574 6/6 Running 0 31h 9.47.89.52 master-1 <none> <none> openshift-ovn-kubernetes ovnkube-node-9d724 2/3 CrashLoopBackOff 371 31h 9.47.89.3 master-0 <none> <none> openshift-ovn-kubernetes ovnkube-node-btsqm 2/3 CrashLoopBackOff 371 31h 9.47.89.78 master-2 <none> <none> openshift-ovn-kubernetes ovnkube-node-rwb59 2/3 CrashLoopBackOff 371 31h 9.47.89.74 worker-1 <none> <none> openshift-ovn-kubernetes ovnkube-node-trn2f 2/3 CrashLoopBackOff 371 31h 9.47.89.77 worker-0 <none> <none> openshift-ovn-kubernetes ovnkube-node-xtpkl 2/3 CrashLoopBackOff 370 31h 9.47.89.52 master-1 <none> <none> openshift-ovn-kubernetes ovs-node-4zv8k 1/1 Running 0 31h 9.47.89.3 master-0 <none> <none> openshift-ovn-kubernetes ovs-node-582dn 1/1 Running 0 31h 9.47.89.77 worker-0 <none> <none> openshift-ovn-kubernetes ovs-node-nb26b 1/1 Running 0 30h 9.47.89.74 worker-1 <none> <none> openshift-ovn-kubernetes ovs-node-q5vt7 1/1 Running 0 31h 9.47.89.78 master-2 <none> <none> openshift-ovn-kubernetes ovs-node-qnxzq 1/1 Running 0 31h 9.47.89.52 On master-0: # oc describe pod ovnkube-node-9d724 -n openshift-ovn-kubernetes Name: ovnkube-node-9d724 Namespace: openshift-ovn-kubernetes Priority: 2000001000 Priority Class Name: system-node-critical Node: master-0/9.47.89.3 Start Time: Thu, 31 Dec 2020 01:21:28 -0500 Labels: app=ovnkube-node component=network controller-revision-hash=7f487f8c49 kubernetes.io/os=linux openshift.io/component=network pod-template-generation=1 type=infra Annotations: <none> Status: Running IP: 9.47.89.3 IPs: IP: 9.47.89.3 Controlled By: DaemonSet/ovnkube-node Containers: ovn-controller: Container ID: cri-o://9e125bd86f1426a940f467106a41199bd05a0ff75267dcc43fdb5be22d84d6d2 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: <none> Host Port: <none> Command: /bin/bash -c set -e if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "$(date -Iseconds) - starting ovn-controller" exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ -vconsole:"${OVN_LOG_LEVEL}" State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 300Mi Environment: OVN_LOG_LEVEL: info K8S_NODE: (v1:spec.nodeName) Mounts: /env from env-overrides (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/openvswitch from run-openvswitch (rw) /run/ovn/ from run-ovn (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) kube-rbac-proxy: Container ID: cri-o://e94e9111eb569b47a69760b658a420234a199c34e6fe62b825d7b40af5cc050c Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Port: 9103/TCP Host Port: 9103/TCP Command: /bin/bash -c #!/bin/bash set -euo pipefail TLS_PK=/etc/pki/tls/metrics-cert/tls.key TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt # As the secret mount is optional we must wait for the files to be present. # The service is created in monitor.yaml and this is created in sdn.yaml. # If it isn't created there is probably an issue so we want to crashloop. retries=0 TS=$(date +%s) WARN_TS=$(( ${TS} + $(( 20 * 60)) )) HAS_LOGGED_INFO=0 log_missing_certs(){ CUR_TS=$(date +%s) if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. HAS_LOGGED_INFO=1 fi } while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do log_missing_certs sleep 5 done echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy exec /usr/bin/kube-rbac-proxy \ --logtostderr \ --secure-listen-address=:9103 \ --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ --upstream=http://127.0.0.1:29103/ \ --tls-private-key-file=${TLS_PK} \ --tls-cert-file=${TLS_CERT} State: Running Started: Thu, 31 Dec 2020 01:21:30 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 20Mi Environment: <none> Mounts: /etc/pki/tls/metrics-cert from ovn-node-metrics-cert (ro) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) ovnkube-node: Container ID: cri-o://355e74c078c31d91ca4c0e3c42c76f3d41bb3b21e8ed5257a550c21403eb3282 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: 29103/TCP Host Port: 29103/TCP Command: /bin/bash -c set -xe if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db_ip addresses" cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ ovn_config_namespace=openshift-ovn-kubernetes echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK retries=0 while true; do # TODO: change to use '--request-timeout=30s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. db_ip=$(timeout 30 kubectl get ep -n ${ovn_config_namespace} ovnkube-db -o jsonpath='{.subsets[0].addresses[0].ip}') if [[ -n "${db_ip}" ]]; then break fi (( retries += 1 )) if [[ "${retries}" -gt 40 ]]; then echo "E$(date "+%m%d %H:%M:%S.%N") - db endpoint never came up" exit 1 fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db endpoint" sleep 5 done echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node db_ip ${db_ip}" gateway_mode_flags= # Check to see if ovs is provided by the node. This is only for upgrade from 4.5->4.6 or # openshift-sdn to ovn-kube conversion if grep -q OVNKubernetes /etc/systemd/system/ovs-configuration.service ; then gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" else gateway_mode_flags="--gateway-mode local --gateway-interface none" fi exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ --nb-address "ssl:9.47.89.3:9641,ssl:9.47.89.52:9641,ssl:9.47.89.78:9641" \ --sb-address "ssl:9.47.89.3:9642,ssl:9.47.89.52:9642,ssl:9.47.89.78:9642" \ --nb-client-privkey /ovn-cert/tls.key \ --nb-client-cert /ovn-cert/tls.crt \ --nb-client-cacert /ovn-ca/ca-bundle.crt \ --nb-cert-common-name "ovn" \ --sb-client-privkey /ovn-cert/tls.key \ --sb-client-cert /ovn-cert/tls.crt \ --sb-client-cacert /ovn-ca/ca-bundle.crt \ --sb-cert-common-name "ovn" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --loglevel "${OVN_KUBE_LOG_LEVEL}" \ --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ ${gateway_mode_flags} \ --metrics-bind-address "127.0.0.1:29103" State: Waiting Reason: CrashLoopBackOff Last State: Terminated Reason: Error Message: c/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc panic: runtime error: invalid memory address or nil pointer dereference [recovered] panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x10f0328] goroutine 261 [running]: k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x148 panic(0x12ed1a0, 0x21f7e20) /usr/lib/golang/src/runtime/panic.go:969 +0x18c github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0x0, 0x0, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc000bba788, 0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc000b9e640, 0xc00079df88, 0xc000b9e640, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc000bba788, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc000926b60, 0xc000451d40, 0xc000273770) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc Exit Code: 2 Started: Fri, 01 Jan 2021 08:53:03 -0500 Finished: Fri, 01 Jan 2021 08:53:07 -0500 Ready: False Restart Count: 371 Requests: cpu: 10m memory: 300Mi Readiness: exec [test -f /etc/cni/net.d/10-ovn-kubernetes.conf] delay=5s timeout=1s period=5s #success=1 #failure=3 Environment: KUBERNETES_SERVICE_PORT: 6443 KUBERNETES_SERVICE_HOST: api-int.arch-ovn.redhat.com OVN_CONTROLLER_INACTIVITY_PROBE: 30000 OVN_KUBE_LOG_LEVEL: 4 K8S_NODE: (v1:spec.nodeName) Mounts: /cni-bin-dir from host-cni-bin (rw) /env from env-overrides (rw) /etc/cni/net.d from host-cni-netd (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /etc/systemd/system from systemd-units (ro) /host from host-slash (ro) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/netns from host-run-netns (ro) /run/openvswitch from run-openvswitch (rw) /run/ovn-kubernetes/ from host-run-ovn-kubernetes (rw) /run/ovn/ from run-ovn (rw) /run/ovnkube-config/ from ovnkube-config (rw) /var/lib/cni/networks/ovn-k8s-cni-overlay from host-var-lib-cni-networks-ovn-kubernetes (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) Conditions: Type Status Initialized True Ready False ContainersReady False PodScheduled True Volumes: systemd-units: Type: HostPath (bare host directory volume) Path: /etc/systemd/system HostPathType: host-slash: Type: HostPath (bare host directory volume) Path: / HostPathType: host-run-netns: Type: HostPath (bare host directory volume) Path: /run/netns HostPathType: var-lib-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/data HostPathType: etc-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/etc HostPathType: run-openvswitch: Type: HostPath (bare host directory volume) Path: /var/run/openvswitch HostPathType: run-ovn: Type: HostPath (bare host directory volume) Path: /var/run/ovn HostPathType: host-run-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /run/ovn-kubernetes HostPathType: host-cni-bin: Type: HostPath (bare host directory volume) Path: /var/lib/cni/bin HostPathType: host-cni-netd: Type: HostPath (bare host directory volume) Path: /var/run/multus/cni/net.d HostPathType: host-var-lib-cni-networks-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /var/lib/cni/networks/ovn-k8s-cni-overlay HostPathType: ovnkube-config: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovnkube-config Optional: false env-overrides: Type: ConfigMap (a volume populated by a ConfigMap) Name: env-overrides Optional: true ovn-ca: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovn-ca Optional: false ovn-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-cert Optional: false ovn-node-metrics-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-node-metrics-cert Optional: true ovn-kubernetes-node-token-v5gvv: Type: Secret (a volume populated by a Secret) SecretName: ovn-kubernetes-node-token-v5gvv Optional: false QoS Class: Burstable Node-Selectors: beta.kubernetes.io/os=linux Tolerations: op=Exists Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Pulled 49m (x363 over 31h) kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a" already present on machine Warning BackOff 4m37s (x8599 over 31h) kubelet Back-off restarting failed container master-1: [root@arch-ovn-bastion ~]# oc describe pod -n openshift-ovn-kubernetes ovnkube-node-xtpkl Name: ovnkube-node-xtpkl Namespace: openshift-ovn-kubernetes Priority: 2000001000 Priority Class Name: system-node-critical Node: master-1/9.47.89.52 Start Time: Thu, 31 Dec 2020 01:21:28 -0500 Labels: app=ovnkube-node component=network controller-revision-hash=7f487f8c49 kubernetes.io/os=linux openshift.io/component=network pod-template-generation=1 type=infra Annotations: <none> Status: Running IP: 9.47.89.52 IPs: IP: 9.47.89.52 Controlled By: DaemonSet/ovnkube-node Containers: ovn-controller: Container ID: cri-o://04b65557619ffbe626956d3ef008c6d0156e3da87806dfefea6e22376372a3c6 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: <none> Host Port: <none> Command: /bin/bash -c set -e if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "$(date -Iseconds) - starting ovn-controller" exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ -vconsole:"${OVN_LOG_LEVEL}" State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 300Mi Environment: OVN_LOG_LEVEL: info K8S_NODE: (v1:spec.nodeName) Mounts: /env from env-overrides (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/openvswitch from run-openvswitch (rw) /run/ovn/ from run-ovn (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) kube-rbac-proxy: Container ID: cri-o://498936878c7db73e626b6819b52d24892ca7c171375d95e513d070f22e94293c Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Port: 9103/TCP Host Port: 9103/TCP Command: /bin/bash -c #!/bin/bash set -euo pipefail TLS_PK=/etc/pki/tls/metrics-cert/tls.key TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt # As the secret mount is optional we must wait for the files to be present. # The service is created in monitor.yaml and this is created in sdn.yaml. # If it isn't created there is probably an issue so we want to crashloop. retries=0 TS=$(date +%s) WARN_TS=$(( ${TS} + $(( 20 * 60)) )) HAS_LOGGED_INFO=0 log_missing_certs(){ CUR_TS=$(date +%s) if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. HAS_LOGGED_INFO=1 fi } while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do log_missing_certs sleep 5 done echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy exec /usr/bin/kube-rbac-proxy \ --logtostderr \ --secure-listen-address=:9103 \ --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ --upstream=http://127.0.0.1:29103/ \ --tls-private-key-file=${TLS_PK} \ --tls-cert-file=${TLS_CERT} State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 20Mi Environment: <none> Mounts: /etc/pki/tls/metrics-cert from ovn-node-metrics-cert (ro) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) ovnkube-node: Container ID: cri-o://55b68e5710032c2bdb94e74b66e23320b79802c5b510136202dea100b63bc4e0 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: 29103/TCP Host Port: 29103/TCP Command: /bin/bash -c set -xe if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db_ip addresses" cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ ovn_config_namespace=openshift-ovn-kubernetes echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK retries=0 while true; do # TODO: change to use '--request-timeout=30s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. db_ip=$(timeout 30 kubectl get ep -n ${ovn_config_namespace} ovnkube-db -o jsonpath='{.subsets[0].addresses[0].ip}') if [[ -n "${db_ip}" ]]; then break fi (( retries += 1 )) if [[ "${retries}" -gt 40 ]]; then echo "E$(date "+%m%d %H:%M:%S.%N") - db endpoint never came up" exit 1 fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db endpoint" sleep 5 done echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node db_ip ${db_ip}" gateway_mode_flags= # Check to see if ovs is provided by the node. This is only for upgrade from 4.5->4.6 or # openshift-sdn to ovn-kube conversion if grep -q OVNKubernetes /etc/systemd/system/ovs-configuration.service ; then gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" else gateway_mode_flags="--gateway-mode local --gateway-interface none" fi exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ --nb-address "ssl:9.47.89.3:9641,ssl:9.47.89.52:9641,ssl:9.47.89.78:9641" \ --sb-address "ssl:9.47.89.3:9642,ssl:9.47.89.52:9642,ssl:9.47.89.78:9642" \ --nb-client-privkey /ovn-cert/tls.key \ --nb-client-cert /ovn-cert/tls.crt \ --nb-client-cacert /ovn-ca/ca-bundle.crt \ --nb-cert-common-name "ovn" \ --sb-client-privkey /ovn-cert/tls.key \ --sb-client-cert /ovn-cert/tls.crt \ --sb-client-cacert /ovn-ca/ca-bundle.crt \ --sb-cert-common-name "ovn" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --loglevel "${OVN_KUBE_LOG_LEVEL}" \ --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ ${gateway_mode_flags} \ --metrics-bind-address "127.0.0.1:29103" State: Waiting Reason: CrashLoopBackOff Last State: Terminated Reason: Error Message: 0x18c github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0x14f94, 0xc000467020, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc000928788, 0x613a66222c7d7b00, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc0003dadc0, 0xc000709f88, 0xc0003dadc0, 0xc0001863f0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc000928788, 0x5baab8, 0x7bf18) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc0003f5240, 0xc000450660, 0xc000124ab0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc Exit Code: 2 Started: Fri, 01 Jan 2021 08:54:39 -0500 Finished: Fri, 01 Jan 2021 08:54:42 -0500 Ready: False Restart Count: 371 Requests: cpu: 10m memory: 300Mi Readiness: exec [test -f /etc/cni/net.d/10-ovn-kubernetes.conf] delay=5s timeout=1s period=5s #success=1 #failure=3 Environment: KUBERNETES_SERVICE_PORT: 6443 KUBERNETES_SERVICE_HOST: api-int.arch-ovn.redhat.com OVN_CONTROLLER_INACTIVITY_PROBE: 30000 OVN_KUBE_LOG_LEVEL: 4 K8S_NODE: (v1:spec.nodeName) Mounts: /cni-bin-dir from host-cni-bin (rw) /env from env-overrides (rw) /etc/cni/net.d from host-cni-netd (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /etc/systemd/system from systemd-units (ro) /host from host-slash (ro) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/netns from host-run-netns (ro) /run/openvswitch from run-openvswitch (rw) /run/ovn-kubernetes/ from host-run-ovn-kubernetes (rw) /run/ovn/ from run-ovn (rw) /run/ovnkube-config/ from ovnkube-config (rw) /var/lib/cni/networks/ovn-k8s-cni-overlay from host-var-lib-cni-networks-ovn-kubernetes (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) Conditions: Type Status Initialized True Ready False ContainersReady False PodScheduled True Volumes: systemd-units: Type: HostPath (bare host directory volume) Path: /etc/systemd/system HostPathType: host-slash: Type: HostPath (bare host directory volume) Path: / HostPathType: host-run-netns: Type: HostPath (bare host directory volume) Path: /run/netns HostPathType: var-lib-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/data HostPathType: etc-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/etc HostPathType: run-openvswitch: Type: HostPath (bare host directory volume) Path: /var/run/openvswitch HostPathType: run-ovn: Type: HostPath (bare host directory volume) Path: /var/run/ovn HostPathType: host-run-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /run/ovn-kubernetes HostPathType: host-cni-bin: Type: HostPath (bare host directory volume) Path: /var/lib/cni/bin HostPathType: host-cni-netd: Type: HostPath (bare host directory volume) Path: /var/run/multus/cni/net.d HostPathType: host-var-lib-cni-networks-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /var/lib/cni/networks/ovn-k8s-cni-overlay HostPathType: ovnkube-config: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovnkube-config Optional: false env-overrides: Type: ConfigMap (a volume populated by a ConfigMap) Name: env-overrides Optional: true ovn-ca: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovn-ca Optional: false ovn-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-cert Optional: false ovn-node-metrics-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-node-metrics-cert Optional: true ovn-kubernetes-node-token-v5gvv: Type: Secret (a volume populated by a Secret) SecretName: ovn-kubernetes-node-token-v5gvv Optional: false QoS Class: Burstable Node-Selectors: beta.kubernetes.io/os=linux Tolerations: op=Exists Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Created 106m (x352 over 31h) kubelet Created container ovnkube-node Normal Pulled 101m (x353 over 31h) kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a" already present on machine Warning BackOff 73s (x8651 over 31h) kubelet Back-off restarting failed container master-2: [root@arch-ovn-bastion ~]# oc describe pod -n openshift-ovn-kubernetes ovnkube-node-btsqm Name: ovnkube-node-btsqm Namespace: openshift-ovn-kubernetes Priority: 2000001000 Priority Class Name: system-node-critical Node: master-2/9.47.89.78 Start Time: Thu, 31 Dec 2020 01:21:28 -0500 Labels: app=ovnkube-node component=network controller-revision-hash=7f487f8c49 kubernetes.io/os=linux openshift.io/component=network pod-template-generation=1 type=infra Annotations: <none> Status: Running IP: 9.47.89.78 IPs: IP: 9.47.89.78 Controlled By: DaemonSet/ovnkube-node Containers: ovn-controller: Container ID: cri-o://76e35641105636a0830190231271eff97827a1a398022c7ce3000a13aab6934f Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: <none> Host Port: <none> Command: /bin/bash -c set -e if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "$(date -Iseconds) - starting ovn-controller" exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ -vconsole:"${OVN_LOG_LEVEL}" State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 300Mi Environment: OVN_LOG_LEVEL: info K8S_NODE: (v1:spec.nodeName) Mounts: /env from env-overrides (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/openvswitch from run-openvswitch (rw) /run/ovn/ from run-ovn (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) kube-rbac-proxy: Container ID: cri-o://0fbec497f9513e6525c0ec28c815b314070de106d1f1fd22d4657f52636e1621 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Port: 9103/TCP Host Port: 9103/TCP Command: /bin/bash -c #!/bin/bash set -euo pipefail TLS_PK=/etc/pki/tls/metrics-cert/tls.key TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt # As the secret mount is optional we must wait for the files to be present. # The service is created in monitor.yaml and this is created in sdn.yaml. # If it isn't created there is probably an issue so we want to crashloop. retries=0 TS=$(date +%s) WARN_TS=$(( ${TS} + $(( 20 * 60)) )) HAS_LOGGED_INFO=0 log_missing_certs(){ CUR_TS=$(date +%s) if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. HAS_LOGGED_INFO=1 fi } while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do log_missing_certs sleep 5 done echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy exec /usr/bin/kube-rbac-proxy \ --logtostderr \ --secure-listen-address=:9103 \ --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ --upstream=http://127.0.0.1:29103/ \ --tls-private-key-file=${TLS_PK} \ --tls-cert-file=${TLS_CERT} State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 20Mi Environment: <none> Mounts: /etc/pki/tls/metrics-cert from ovn-node-metrics-cert (ro) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) ovnkube-node: Container ID: cri-o://88e9e70613ca93f66efc15c986d65fd6cd0c23ad97ad6a478fee1def0481c608 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: 29103/TCP Host Port: 29103/TCP Command: /bin/bash -c set -xe if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db_ip addresses" cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ ovn_config_namespace=openshift-ovn-kubernetes echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK retries=0 while true; do # TODO: change to use '--request-timeout=30s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. db_ip=$(timeout 30 kubectl get ep -n ${ovn_config_namespace} ovnkube-db -o jsonpath='{.subsets[0].addresses[0].ip}') if [[ -n "${db_ip}" ]]; then break fi (( retries += 1 )) if [[ "${retries}" -gt 40 ]]; then echo "E$(date "+%m%d %H:%M:%S.%N") - db endpoint never came up" exit 1 fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db endpoint" sleep 5 done echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node db_ip ${db_ip}" gateway_mode_flags= # Check to see if ovs is provided by the node. This is only for upgrade from 4.5->4.6 or # openshift-sdn to ovn-kube conversion if grep -q OVNKubernetes /etc/systemd/system/ovs-configuration.service ; then gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" else gateway_mode_flags="--gateway-mode local --gateway-interface none" fi exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ --nb-address "ssl:9.47.89.3:9641,ssl:9.47.89.52:9641,ssl:9.47.89.78:9641" \ --sb-address "ssl:9.47.89.3:9642,ssl:9.47.89.52:9642,ssl:9.47.89.78:9642" \ --nb-client-privkey /ovn-cert/tls.key \ --nb-client-cert /ovn-cert/tls.crt \ --nb-client-cacert /ovn-ca/ca-bundle.crt \ --nb-cert-common-name "ovn" \ --sb-client-privkey /ovn-cert/tls.key \ --sb-client-cert /ovn-cert/tls.crt \ --sb-client-cacert /ovn-ca/ca-bundle.crt \ --sb-cert-common-name "ovn" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --loglevel "${OVN_KUBE_LOG_LEVEL}" \ --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ ${gateway_mode_flags} \ --metrics-bind-address "127.0.0.1:29103" State: Waiting Reason: CrashLoopBackOff Last State: Terminated Reason: Error Message: oller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc panic: runtime error: invalid memory address or nil pointer dereference [recovered] panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x10f0328] goroutine 298 [running]: k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x148 panic(0x12ed1a0, 0x21f7e20) /usr/lib/golang/src/runtime/panic.go:969 +0x18c github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0xc0008322d0, 0x17281c0, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc0001b4788, 0xc0003c8400, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc0005ce2a0, 0xc000a0df88, 0xc0005ce2a0, 0xc0003dc038) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc0001b4788, 0xc0003dc038, 0x7bf18) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc000460620, 0xc0004481e0, 0xc0002cd2b0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc Exit Code: 2 Started: Fri, 01 Jan 2021 08:57:37 -0500 Finished: Fri, 01 Jan 2021 08:57:39 -0500 Ready: False Restart Count: 372 Requests: cpu: 10m memory: 300Mi Readiness: exec [test -f /etc/cni/net.d/10-ovn-kubernetes.conf] delay=5s timeout=1s period=5s #success=1 #failure=3 Environment: KUBERNETES_SERVICE_PORT: 6443 KUBERNETES_SERVICE_HOST: api-int.arch-ovn.redhat.com OVN_CONTROLLER_INACTIVITY_PROBE: 30000 OVN_KUBE_LOG_LEVEL: 4 K8S_NODE: (v1:spec.nodeName) Mounts: /cni-bin-dir from host-cni-bin (rw) /env from env-overrides (rw) /etc/cni/net.d from host-cni-netd (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /etc/systemd/system from systemd-units (ro) /host from host-slash (ro) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/netns from host-run-netns (ro) /run/openvswitch from run-openvswitch (rw) /run/ovn-kubernetes/ from host-run-ovn-kubernetes (rw) /run/ovn/ from run-ovn (rw) /run/ovnkube-config/ from ovnkube-config (rw) /var/lib/cni/networks/ovn-k8s-cni-overlay from host-var-lib-cni-networks-ovn-kubernetes (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) Conditions: Type Status Initialized True Ready False ContainersReady False PodScheduled True Volumes: systemd-units: Type: HostPath (bare host directory volume) Path: /etc/systemd/system HostPathType: host-slash: Type: HostPath (bare host directory volume) Path: / HostPathType: host-run-netns: Type: HostPath (bare host directory volume) Path: /run/netns HostPathType: var-lib-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/data HostPathType: etc-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/etc HostPathType: run-openvswitch: Type: HostPath (bare host directory volume) Path: /var/run/openvswitch HostPathType: run-ovn: Type: HostPath (bare host directory volume) Path: /var/run/ovn HostPathType: host-run-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /run/ovn-kubernetes HostPathType: host-cni-bin: Type: HostPath (bare host directory volume) Path: /var/lib/cni/bin HostPathType: host-cni-netd: Type: HostPath (bare host directory volume) Path: /var/run/multus/cni/net.d HostPathType: host-var-lib-cni-networks-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /var/lib/cni/networks/ovn-k8s-cni-overlay HostPathType: ovnkube-config: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovnkube-config Optional: false env-overrides: Type: ConfigMap (a volume populated by a ConfigMap) Name: env-overrides Optional: true ovn-ca: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovn-ca Optional: false ovn-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-cert Optional: false ovn-node-metrics-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-node-metrics-cert Optional: true ovn-kubernetes-node-token-v5gvv: Type: Secret (a volume populated by a Secret) SecretName: ovn-kubernetes-node-token-v5gvv Optional: false QoS Class: Burstable Node-Selectors: beta.kubernetes.io/os=linux Tolerations: op=Exists Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Pulled 36m (x366 over 31h) kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a" already present on machine Warning BackOff 106s (x8641 over 31h) kubelet Back-off restarting failed container Worker-0: # oc describe pod -n openshift-ovn-kubernetes ovnkube-node-trn2f Name: ovnkube-node-trn2f Namespace: openshift-ovn-kubernetes Priority: 2000001000 Priority Class Name: system-node-critical Node: worker-0/9.47.89.77 Start Time: Thu, 31 Dec 2020 01:21:28 -0500 Labels: app=ovnkube-node component=network controller-revision-hash=7f487f8c49 kubernetes.io/os=linux openshift.io/component=network pod-template-generation=1 type=infra Annotations: <none> Status: Running IP: 9.47.89.77 IPs: IP: 9.47.89.77 Controlled By: DaemonSet/ovnkube-node Containers: ovn-controller: Container ID: cri-o://7bd9e881616de75df195f49f2c8af9af979f68204287d04540529c6eeb4a8bb9 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: <none> Host Port: <none> Command: /bin/bash -c set -e if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "$(date -Iseconds) - starting ovn-controller" exec ovn-controller unix:/var/run/openvswitch/db.sock -vfile:off \ --no-chdir --pidfile=/var/run/ovn/ovn-controller.pid \ -p /ovn-cert/tls.key -c /ovn-cert/tls.crt -C /ovn-ca/ca-bundle.crt \ -vconsole:"${OVN_LOG_LEVEL}" State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 300Mi Environment: OVN_LOG_LEVEL: info K8S_NODE: (v1:spec.nodeName) Mounts: /env from env-overrides (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/openvswitch from run-openvswitch (rw) /run/ovn/ from run-ovn (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) kube-rbac-proxy: Container ID: cri-o://f19fbba6381b9d6a7f97cb09783ffac05424527ad9ed831b0a2f718b96697665 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2d62df165387a21adf64ca0454122b72689ba4c53bfc13915be032a38cbaebce Port: 9103/TCP Host Port: 9103/TCP Command: /bin/bash -c #!/bin/bash set -euo pipefail TLS_PK=/etc/pki/tls/metrics-cert/tls.key TLS_CERT=/etc/pki/tls/metrics-cert/tls.crt # As the secret mount is optional we must wait for the files to be present. # The service is created in monitor.yaml and this is created in sdn.yaml. # If it isn't created there is probably an issue so we want to crashloop. retries=0 TS=$(date +%s) WARN_TS=$(( ${TS} + $(( 20 * 60)) )) HAS_LOGGED_INFO=0 log_missing_certs(){ CUR_TS=$(date +%s) if [[ "${CUR_TS}" -gt "WARN_TS" ]]; then echo $(date -Iseconds) WARN: ovn-node-metrics-cert not mounted after 20 minutes. elif [[ "${HAS_LOGGED_INFO}" -eq 0 ]] ; then echo $(date -Iseconds) INFO: ovn-node-metrics-cert not mounted. Waiting one hour. HAS_LOGGED_INFO=1 fi } while [[ ! -f "${TLS_PK}" || ! -f "${TLS_CERT}" ]] ; do log_missing_certs sleep 5 done echo $(date -Iseconds) INFO: ovn-node-metrics-certs mounted, starting kube-rbac-proxy exec /usr/bin/kube-rbac-proxy \ --logtostderr \ --secure-listen-address=:9103 \ --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 \ --upstream=http://127.0.0.1:29103/ \ --tls-private-key-file=${TLS_PK} \ --tls-cert-file=${TLS_CERT} State: Running Started: Thu, 31 Dec 2020 01:21:29 -0500 Ready: True Restart Count: 0 Requests: cpu: 10m memory: 20Mi Environment: <none> Mounts: /etc/pki/tls/metrics-cert from ovn-node-metrics-cert (ro) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) ovnkube-node: Container ID: cri-o://82b69108f18e236a0c95f03034d358ca20f261a78ae16b20a3b72ec225b2ae07 Image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Image ID: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a Port: 29103/TCP Host Port: 29103/TCP Command: /bin/bash -c set -xe if [[ -f "/env/${K8S_NODE}" ]]; then set -o allexport source "/env/${K8S_NODE}" set +o allexport fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db_ip addresses" cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ ovn_config_namespace=openshift-ovn-kubernetes echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK retries=0 while true; do # TODO: change to use '--request-timeout=30s', if https://github.com/kubernetes/kubernetes/issues/49343 is fixed. db_ip=$(timeout 30 kubectl get ep -n ${ovn_config_namespace} ovnkube-db -o jsonpath='{.subsets[0].addresses[0].ip}') if [[ -n "${db_ip}" ]]; then break fi (( retries += 1 )) if [[ "${retries}" -gt 40 ]]; then echo "E$(date "+%m%d %H:%M:%S.%N") - db endpoint never came up" exit 1 fi echo "I$(date "+%m%d %H:%M:%S.%N") - waiting for db endpoint" sleep 5 done echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node db_ip ${db_ip}" gateway_mode_flags= # Check to see if ovs is provided by the node. This is only for upgrade from 4.5->4.6 or # openshift-sdn to ovn-kube conversion if grep -q OVNKubernetes /etc/systemd/system/ovs-configuration.service ; then gateway_mode_flags="--gateway-mode local --gateway-interface br-ex" else gateway_mode_flags="--gateway-mode local --gateway-interface none" fi exec /usr/bin/ovnkube --init-node "${K8S_NODE}" \ --nb-address "ssl:9.47.89.3:9641,ssl:9.47.89.52:9641,ssl:9.47.89.78:9641" \ --sb-address "ssl:9.47.89.3:9642,ssl:9.47.89.52:9642,ssl:9.47.89.78:9642" \ --nb-client-privkey /ovn-cert/tls.key \ --nb-client-cert /ovn-cert/tls.crt \ --nb-client-cacert /ovn-ca/ca-bundle.crt \ --nb-cert-common-name "ovn" \ --sb-client-privkey /ovn-cert/tls.key \ --sb-client-cert /ovn-cert/tls.crt \ --sb-client-cacert /ovn-ca/ca-bundle.crt \ --sb-cert-common-name "ovn" \ --config-file=/run/ovnkube-config/ovnkube.conf \ --loglevel "${OVN_KUBE_LOG_LEVEL}" \ --inactivity-probe="${OVN_CONTROLLER_INACTIVITY_PROBE}" \ ${gateway_mode_flags} \ --metrics-bind-address "127.0.0.1:29103" State: Waiting Reason: CrashLoopBackOff Last State: Terminated Reason: Error Message: -controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc panic: runtime error: invalid memory address or nil pointer dereference [recovered] panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x10f0328] goroutine 271 [running]: k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x148 panic(0x12ed1a0, 0x21f7e20) /usr/lib/golang/src/runtime/panic.go:969 +0x18c github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0x14f94, 0xc000304f40, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc00054bf88, 0xc0003d8400, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc00065fd80, 0xc000993f88, 0xc00065fd80, 0xc00061c4e8) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc00054bf88, 0x5baab8, 0x7bf18) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc0006192e0, 0xc0005015c0, 0xc00061b690) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc Exit Code: 2 Started: Fri, 01 Jan 2021 09:02:40 -0500 Finished: Fri, 01 Jan 2021 09:02:42 -0500 Ready: False Restart Count: 373 Requests: cpu: 10m memory: 300Mi Readiness: exec [test -f /etc/cni/net.d/10-ovn-kubernetes.conf] delay=5s timeout=1s period=5s #success=1 #failure=3 Environment: KUBERNETES_SERVICE_PORT: 6443 KUBERNETES_SERVICE_HOST: api-int.arch-ovn.redhat.com OVN_CONTROLLER_INACTIVITY_PROBE: 30000 OVN_KUBE_LOG_LEVEL: 4 K8S_NODE: (v1:spec.nodeName) Mounts: /cni-bin-dir from host-cni-bin (rw) /env from env-overrides (rw) /etc/cni/net.d from host-cni-netd (rw) /etc/openvswitch from etc-openvswitch (rw) /etc/ovn/ from etc-openvswitch (rw) /etc/systemd/system from systemd-units (ro) /host from host-slash (ro) /ovn-ca from ovn-ca (rw) /ovn-cert from ovn-cert (rw) /run/netns from host-run-netns (ro) /run/openvswitch from run-openvswitch (rw) /run/ovn-kubernetes/ from host-run-ovn-kubernetes (rw) /run/ovn/ from run-ovn (rw) /run/ovnkube-config/ from ovnkube-config (rw) /var/lib/cni/networks/ovn-k8s-cni-overlay from host-var-lib-cni-networks-ovn-kubernetes (rw) /var/lib/openvswitch from var-lib-openvswitch (rw) /var/run/secrets/kubernetes.io/serviceaccount from ovn-kubernetes-node-token-v5gvv (ro) Conditions: Type Status Initialized True Ready False ContainersReady False PodScheduled True Volumes: systemd-units: Type: HostPath (bare host directory volume) Path: /etc/systemd/system HostPathType: host-slash: Type: HostPath (bare host directory volume) Path: / HostPathType: host-run-netns: Type: HostPath (bare host directory volume) Path: /run/netns HostPathType: var-lib-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/data HostPathType: etc-openvswitch: Type: HostPath (bare host directory volume) Path: /var/lib/openvswitch/etc HostPathType: run-openvswitch: Type: HostPath (bare host directory volume) Path: /var/run/openvswitch HostPathType: run-ovn: Type: HostPath (bare host directory volume) Path: /var/run/ovn HostPathType: host-run-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /run/ovn-kubernetes HostPathType: host-cni-bin: Type: HostPath (bare host directory volume) Path: /var/lib/cni/bin HostPathType: host-cni-netd: Type: HostPath (bare host directory volume) Path: /var/run/multus/cni/net.d HostPathType: host-var-lib-cni-networks-ovn-kubernetes: Type: HostPath (bare host directory volume) Path: /var/lib/cni/networks/ovn-k8s-cni-overlay HostPathType: ovnkube-config: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovnkube-config Optional: false env-overrides: Type: ConfigMap (a volume populated by a ConfigMap) Name: env-overrides Optional: true ovn-ca: Type: ConfigMap (a volume populated by a ConfigMap) Name: ovn-ca Optional: false ovn-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-cert Optional: false ovn-node-metrics-cert: Type: Secret (a volume populated by a Secret) SecretName: ovn-node-metrics-cert Optional: true ovn-kubernetes-node-token-v5gvv: Type: Secret (a volume populated by a Secret) SecretName: ovn-kubernetes-node-token-v5gvv Optional: false QoS Class: Burstable Node-Selectors: beta.kubernetes.io/os=linux Tolerations: op=Exists Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Pulled 39m (x367 over 31h) kubelet Container image "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:ca193198ee8c952f1cc5d73c47380cf9fe34bf3280f1e2a08c3d7a9317013c8a" already present on machine Warning BackOff 4m47s (x8654 over 31h) kubelet Back-off restarting failed container Unable to perform oc login # oc login -u kubeadmin Unable to connect to the server: EOF # oc logs ovnkube-node-rwb59 -n openshift-ovn-kubernetes ovn-controller error: You must be logged in to the server (the server has asked for the client to provide credentials ( pods/log ovnkube-node-rwb59))
Can you provide the logs for 5 pods?
After digging further into the code, it appears that a wait task that should be getting added is not getting added, perhaps an error condition that prevents it in either ../node/gateway_init.go or ../node/management-port.go is happening and returning before the AddWait can get called. I think having logs from the pods that are crashing would help determine if there is some error condition that occurs on this environment, preventing the task from getting added and leading to the nil pointer error. Passing this onto OVNKube group for further analysis.
The multi-arch bug triage team looked through the bugs and think this may be related to (or may be fixed by) BZ 1908231. This bug is also similar to BZ 1908076
I'm unable run must-gather or run oc logs on the cluster, so I've tried to get the container logs directly from the node. Unable to perform oc login # oc login -u kubeadmin Unable to connect to the server: EOF # oc logs ovnkube-node-rwb59 -n openshift-ovn-kubernetes ovn-controller error: You must be logged in to the server (the server has asked for the client to provide credentials ( pods/log ovnkube-node-rwb59)) # oc adm must-gather [must-gather ] OUT the server is currently unable to handle the request (get imagestreams.image.openshift.io must-gather) [must-gather ] OUT [must-gather ] OUT Using must-gather plug-in image: registry.redhat.io/openshift4/ose-must-gather:latest [must-gather ] OUT namespace/openshift-must-gather-tj29v created [must-gather ] OUT clusterrolebinding.rbac.authorization.k8s.io/must-gather-h8gb9 created [must-gather ] OUT pod for plug-in image registry.redhat.io/openshift4/ose-must-gather:latest created [must-gather-j8g6q] OUT gather did not start: timed out waiting for the condition [must-gather ] OUT clusterrolebinding.rbac.authorization.k8s.io/must-gather-h8gb9 deleted [must-gather ] OUT namespace/openshift-must-gather-tj29v deleted error: gather did not start for pod must-gather-j8g6q: timed out waiting for the condition container logs for ovnkube-node on master-0: [root@master-0 core]# crictl logs 22b377f65c1df + [[ -f /env/master-0 ]] ++ date '+%m%d %H:%M:%S.%N' + echo 'I0105 14:48:43.022263134 - waiting for db_ip addresses' I0105 14:48:43.022263134 - waiting for db_ip addresses + cp -f /usr/libexec/cni/ovn-k8s-cni-overlay /cni-bin-dir/ + ovn_config_namespace=openshift-ovn-kubernetes ++ date '+%m%d %H:%M:%S.%N' + echo 'I0105 14:48:43.155393669 - disable conntrack on geneve port' I0105 14:48:43.155393669 - disable conntrack on geneve port + iptables -t raw -A PREROUTING -p udp --dport 6081 -j NOTRACK + iptables -t raw -A OUTPUT -p udp --dport 6081 -j NOTRACK + retries=0 + true ++ timeout 30 kubectl get ep -n openshift-ovn-kubernetes ovnkube-db -o 'jsonpath={.subsets[0].addresses[0].ip}' + db_ip=9.47.89.3 + [[ -n 9.47.89.3 ]] + break ++ date '+%m%d %H:%M:%S.%N' + echo 'I0105 14:48:44.699796108 - starting ovnkube-node db_ip 9.47.89.3' I0105 14:48:44.699796108 - starting ovnkube-node db_ip 9.47.89.3 + gateway_mode_flags= + grep -q OVNKubernetes /etc/systemd/system/ovs-configuration.service + gateway_mode_flags='--gateway-mode local --gateway-interface none' + exec /usr/bin/ovnkube --init-node master-0 --nb-address ssl:9.47.89.3:9641,ssl:9.47.89.52:9641,ssl:9.47.89.78:9641 --sb-address ssl:9.47.89.3:9642,ssl:9.47.89.52:9642,ssl:9.47.89.78:9642 --nb-client-privkey /ovn-cert/tls.key --nb-client-cert /ovn-cert/tls.crt --nb-client-cacert /ovn-ca/ca-bundle.crt --nb-cert-common-name ovn --sb-client-privkey /ovn-cert/tls.key --sb-client-cert /ovn-cert/tls.crt --sb-client-cacert /ovn-ca/ca-bundle.crt --sb-cert-common-name ovn --config-file=/run/ovnkube-config/ovnkube.conf --loglevel 4 --inactivity-probe=30000 --gateway-mode local --gateway-interface none --metrics-bind-address 127.0.0.1:29103 I0105 14:48:44.720529 204177 config.go:1306] Parsed config file /run/ovnkube-config/ovnkube.conf I0105 14:48:44.720614 204177 config.go:1307] Parsed config: {Default:{MTU:1400 ConntrackZone:64000 EncapType:geneve EncapIP: EncapPort:6081 InactivityProbe:100000 OpenFlowProbe:180 RawClusterSubnets:10.128.0.0/14/23 ClusterSubnets:[]} Logging:{File: CNIFile: Level:4 LogFileMaxSize:100 LogFileMaxBackups:5 LogFileMaxAge:5} CNI:{ConfDir:/etc/cni/net.d Plugin:ovn-k8s-cni-overlay} OVNKubernetesFeature:{EnableEgressIP:true} Kubernetes:{Kubeconfig: CACert: APIServer:https://api-int.arch-ovn.redhat.com:6443 Token: CompatServiceCIDR: RawServiceCIDRs:172.30.0.0/16 ServiceCIDRs:[] OVNConfigNamespace:openshift-ovn-kubernetes MetricsBindAddress: OVNMetricsBindAddress: MetricsEnablePprof:false OVNEmptyLbEvents:false PodIP: RawNoHostSubnetNodes: NoHostSubnetNodes:nil} OvnNorth:{Address: PrivKey: Cert: CACert: CertCommonName: Scheme: northbound:false externalID: exec:<nil>} OvnSouth:{Address: PrivKey: Cert: CACert: CertCommonName: Scheme: northbound:false externalID: exec:<nil>} Gateway:{Mode:local Interface: NextHop: VLANID:0 NodeportEnable:true DisableSNATMultipleGWs:false V4JoinSubnet:100.64.0.0/16 V6JoinSubnet:fd98::/64} MasterHA:{ElectionLeaseDuration:60 ElectionRenewDeadline:30 ElectionRetryPeriod:20} HybridOverlay:{Enabled:false RawClusterSubnets: ClusterSubnets:[] VXLANPort:4789}} I0105 14:48:44.725047 204177 ovnkube.go:347] Watching config file /run/ovnkube-config/ovnkube.conf for changes I0105 14:48:44.725156 204177 ovnkube.go:347] Watching config file /run/ovnkube-config/..2020_12_31_06_21_28.971750638/ovnkube.conf for changes I0105 14:48:44.765745 204177 reflector.go:219] Starting reflector *v1.Endpoints (0s) from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.765771 204177 reflector.go:255] Listing and watching *v1.Endpoints from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.765805 204177 reflector.go:219] Starting reflector *v1.Node (0s) from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.765820 204177 reflector.go:255] Listing and watching *v1.Node from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.765723 204177 reflector.go:219] Starting reflector *v1.Pod (0s) from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.765907 204177 reflector.go:255] Listing and watching *v1.Pod from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.766165 204177 reflector.go:219] Starting reflector *v1.Service (0s) from k8s.io/client-go/informers/factory.go:134 I0105 14:48:44.766196 204177 reflector.go:255] Listing and watching *v1.Service from k8s.io/client-go/informers/factory.go:134 I0105 14:48:45.464079 204177 shared_informer.go:270] caches populated I0105 14:48:45.464109 204177 shared_informer.go:270] caches populated I0105 14:48:46.064209 204177 shared_informer.go:270] caches populated I0105 14:48:46.064290 204177 shared_informer.go:270] caches populated I0105 14:48:46.064588 204177 config.go:927] exec: /usr/bin/ovs-vsctl --timeout=15 set Open_vSwitch . external_ids:ovn-nb="ssl:9.47.89.3:9641,ssl:9.47.89.52:9641,ssl:9.47.89.78:9641" I0105 14:48:46.070274 204177 config.go:927] exec: /usr/bin/ovs-vsctl --timeout=15 del-ssl I0105 14:48:46.120865 204177 config.go:927] exec: /usr/bin/ovs-vsctl --timeout=15 set-ssl /ovn-cert/tls.key /ovn-cert/tls.crt /ovn-ca/ca-bundle.crt I0105 14:48:46.171754 204177 config.go:927] exec: /usr/bin/ovs-vsctl --timeout=15 set Open_vSwitch . external_ids:ovn-remote="ssl:9.47.89.3:9642,ssl:9.47.89.52:9642,ssl:9.47.89.78:9642" I0105 14:48:46.190395 204177 ovs.go:166] exec(1): /usr/bin/ovs-vsctl --timeout=15 set Open_vSwitch . external_ids:ovn-encap-type=geneve external_ids:ovn-encap-ip=9.47.89.3 external_ids:ovn-remote-probe-interval=30000 external_ids:ovn-openflow-probe-interval=180 external_ids:hostname="master-0" external_ids:ovn-monitor-all=true I0105 14:48:46.195457 204177 ovs.go:169] exec(1): stdout: "" I0105 14:48:46.195500 204177 ovs.go:170] exec(1): stderr: "" I0105 14:48:46.200595 204177 node.go:204] Node master-0 ready for ovn initialization with subnet 10.131.0.0/23 I0105 14:48:46.200756 204177 ovs.go:166] exec(2): /usr/bin/ovs-appctl --timeout=15 -t /var/run/ovn/ovn-controller.21104.ctl connection-status I0105 14:48:46.205793 204177 ovs.go:169] exec(2): stdout: "connected\n" I0105 14:48:46.205886 204177 ovs.go:170] exec(2): stderr: "" I0105 14:48:46.205952 204177 node.go:118] Node master-0 connection status = connected I0105 14:48:46.206028 204177 ovs.go:166] exec(3): /usr/bin/ovs-vsctl --timeout=15 -- br-exists br-int I0105 14:48:46.213483 204177 ovs.go:169] exec(3): stdout: "" I0105 14:48:46.213517 204177 ovs.go:170] exec(3): stderr: "" I0105 14:48:46.213548 204177 ovs.go:166] exec(4): /usr/bin/ovs-ofctl dump-aggregate br-int I0105 14:48:46.218721 204177 ovs.go:169] exec(4): stdout: "NXST_AGGREGATE reply (xid=0x4): packet_count=22900085 byte_count=14349475036 flow_count=3784\n" I0105 14:48:46.218773 204177 ovs.go:170] exec(4): stderr: "" I0105 14:48:46.218986 204177 ovs.go:166] exec(5): /usr/bin/ovs-vsctl --timeout=15 -- --if-exists del-port br-int k8s-master-0 -- --may-exist add-port br-int ovn-k8s-mp0 -- set interface ovn-k8s-mp0 type=internal mtu_request=1400 external-ids:iface-id=k8s-master-0 I0105 14:48:46.227483 204177 ovs.go:169] exec(5): stdout: "" I0105 14:48:46.227564 204177 ovs.go:170] exec(5): stderr: "" I0105 14:48:46.227625 204177 ovs.go:166] exec(6): /usr/bin/ovs-vsctl --timeout=15 --if-exists get interface ovn-k8s-mp0 mac_in_use I0105 14:48:46.234086 204177 ovs.go:169] exec(6): stdout: "\"0a:64:ad:6a:e5:cc\"\n" I0105 14:48:46.234119 204177 ovs.go:170] exec(6): stderr: "" I0105 14:48:46.234155 204177 ovs.go:166] exec(7): /usr/bin/ovs-vsctl --timeout=15 set interface ovn-k8s-mp0 mac=0a\:64\:ad\:6a\:e5\:cc I0105 14:48:46.239381 204177 ovs.go:169] exec(7): stdout: "" I0105 14:48:46.239415 204177 ovs.go:170] exec(7): stderr: "" I0105 14:48:46.288292 204177 gateway_init.go:162] Initializing Gateway Functionality I0105 14:48:46.288753 204177 gateway_localnet.go:182] Node local addresses initialized to: map[10.131.0.2:{10.131.0.0 fffffe00} 127.0.0.1:{127.0.0.0 ff000000} 169.254.0.1:{169.254.0.0 fffff000} 9.47.89.3:{9.47.80.0 fffff000} ::1:{::1 ffffffffffffffffffffffffffffffff} fd55:faaf:e1ab:3ee:f816:3eff:fe22:6526:{fd55:faaf:e1ab:3ee:: ffffffffffffffff0000000000000000} fe80::44b4:abff:fe6d:6c4f:{fe80:: ffffffffffffffff0000000000000000} fe80::858:a9ff:fefe:1:{fe80:: ffffffffffffffff0000000000000000} fe80::864:adff:fe6a:e5cc:{fe80:: ffffffffffffffff0000000000000000} fe80::ec26:7fff:fe62:9dc6:{fe80:: ffffffffffffffff0000000000000000} fe80::f816:3eff:fe22:6526:{fe80:: ffffffffffffffff0000000000000000}] I0105 14:48:46.289041 204177 gateway_init.go:191] Preparing Local Gateway I0105 14:48:46.289069 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-p tcp -m tcp --dport 22623 -j REJECT" for protocol: 0 I0105 14:48:46.293012 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.298399 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-p tcp -m tcp --dport 22624 -j REJECT" for protocol: 0 I0105 14:48:46.302429 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.306669 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: OUTPUT with args: "-p tcp -m tcp --dport 22623 -j REJECT" for protocol: 0 I0105 14:48:46.310165 204177 gateway_iptables.go:46] Chain: "filter" in table: "OUTPUT" already exists, skipping creation I0105 14:48:46.314131 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: OUTPUT with args: "-p tcp -m tcp --dport 22624 -j REJECT" for protocol: 0 I0105 14:48:46.317712 204177 gateway_iptables.go:46] Chain: "filter" in table: "OUTPUT" already exists, skipping creation I0105 14:48:46.321807 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-i ovn-k8s-mp0 -j ACCEPT" for protocol: 0 I0105 14:48:46.325359 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.329386 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-o ovn-k8s-mp0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT" for protocol: 0 I0105 14:48:46.332858 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.337085 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: INPUT with args: "-i ovn-k8s-mp0 -m comment --comment from OVN to localhost -j ACCEPT" for protocol: 0 I0105 14:48:46.340650 204177 gateway_iptables.go:46] Chain: "filter" in table: "INPUT" already exists, skipping creation I0105 14:48:46.344722 204177 gateway_iptables.go:43] Adding rule in table: nat, chain: POSTROUTING with args: "-s 10.131.0.0/23 -j MASQUERADE" for protocol: 0 I0105 14:48:46.348353 204177 gateway_iptables.go:46] Chain: "nat" in table: "POSTROUTING" already exists, skipping creation I0105 14:48:46.352229 204177 ovs.go:166] exec(8): /usr/bin/ovs-vsctl --timeout=15 --may-exist add-br br-local I0105 14:48:46.360381 204177 ovs.go:169] exec(8): stdout: "" I0105 14:48:46.360435 204177 ovs.go:170] exec(8): stderr: "" I0105 14:48:46.360460 204177 ovs.go:166] exec(9): /usr/bin/ovs-vsctl --timeout=15 --if-exists get interface br-local mac_in_use I0105 14:48:46.365282 204177 ovs.go:169] exec(9): stdout: "\"46:b4:ab:6d:6c:4f\"\n" I0105 14:48:46.365312 204177 ovs.go:170] exec(9): stderr: "" I0105 14:48:46.365345 204177 ovs.go:166] exec(10): /usr/bin/ovs-vsctl --timeout=15 set bridge br-local other-config:hwaddr=46:b4:ab:6d:6c:4f I0105 14:48:46.369991 204177 ovs.go:169] exec(10): stdout: "" I0105 14:48:46.370020 204177 ovs.go:170] exec(10): stderr: "" I0105 14:48:46.370048 204177 ovs.go:166] exec(11): /usr/bin/ovs-vsctl --timeout=15 --if-exists get Open_vSwitch . external_ids:ovn-bridge-mappings I0105 14:48:46.374832 204177 ovs.go:169] exec(11): stdout: "\"locnet:br-local\"\n" I0105 14:48:46.375066 204177 ovs.go:170] exec(11): stderr: "" I0105 14:48:46.375315 204177 ovs.go:166] exec(12): /usr/bin/ovs-vsctl --timeout=15 set Open_vSwitch . external_ids:ovn-bridge-mappings=locnet:br-local I0105 14:48:46.379972 204177 ovs.go:169] exec(12): stdout: "" I0105 14:48:46.380003 204177 ovs.go:170] exec(12): stderr: "" I0105 14:48:46.380209 204177 ovs.go:166] exec(13): /usr/bin/ovs-vsctl --timeout=15 --may-exist add-port br-local ovn-k8s-gw0 -- set interface ovn-k8s-gw0 type=internal mtu_request=1400 mac=0a\:58\:a9\:fe\:00\:01 I0105 14:48:46.387014 204177 ovs.go:169] exec(13): stdout: "" I0105 14:48:46.387043 204177 ovs.go:170] exec(13): stderr: "" I0105 14:48:46.400047 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-p tcp -m tcp --dport 22623 -j REJECT" for protocol: 0 I0105 14:48:46.405219 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.410738 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-p tcp -m tcp --dport 22624 -j REJECT" for protocol: 0 I0105 14:48:46.414957 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.419897 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: OUTPUT with args: "-p tcp -m tcp --dport 22623 -j REJECT" for protocol: 0 I0105 14:48:46.423863 204177 gateway_iptables.go:46] Chain: "filter" in table: "OUTPUT" already exists, skipping creation I0105 14:48:46.427998 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: OUTPUT with args: "-p tcp -m tcp --dport 22624 -j REJECT" for protocol: 0 I0105 14:48:46.432461 204177 gateway_iptables.go:46] Chain: "filter" in table: "OUTPUT" already exists, skipping creation I0105 14:48:46.436943 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-i ovn-k8s-gw0 -j ACCEPT" for protocol: 0 I0105 14:48:46.441088 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.446223 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-o ovn-k8s-gw0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT" for protocol: 0 I0105 14:48:46.451781 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.456241 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: INPUT with args: "-i ovn-k8s-gw0 -m comment --comment from OVN to localhost -j ACCEPT" for protocol: 0 I0105 14:48:46.461026 204177 gateway_iptables.go:46] Chain: "filter" in table: "INPUT" already exists, skipping creation I0105 14:48:46.465156 204177 gateway_iptables.go:43] Adding rule in table: nat, chain: POSTROUTING with args: "-s 169.254.0.1/20 -j MASQUERADE" for protocol: 0 I0105 14:48:46.468952 204177 gateway_iptables.go:46] Chain: "nat" in table: "POSTROUTING" already exists, skipping creation I0105 14:48:46.473675 204177 ovs.go:166] exec(14): /usr/bin/ovs-vsctl --timeout=15 --if-exists get Open_vSwitch . external_ids:system-id I0105 14:48:46.479504 204177 ovs.go:169] exec(14): stdout: "\"7b315658-14df-43e2-b250-bacdf05b5772\"\n" I0105 14:48:46.479546 204177 ovs.go:170] exec(14): stderr: "" I0105 14:48:46.480382 204177 gateway_localnet.go:182] Node local addresses initialized to: map[10.131.0.2:{10.131.0.0 fffffe00} 127.0.0.1:{127.0.0.0 ff000000} 169.254.0.1:{169.254.0.0 fffff000} 9.47.89.3:{9.47.80.0 fffff000} ::1:{::1 ffffffffffffffffffffffffffffffff} fd55:faaf:e1ab:3ee:f816:3eff:fe22:6526:{fd55:faaf:e1ab:3ee:: ffffffffffffffff0000000000000000} fe80::44b4:abff:fe6d:6c4f:{fe80:: ffffffffffffffff0000000000000000} fe80::858:a9ff:fefe:1:{fe80:: ffffffffffffffff0000000000000000} fe80::864:adff:fe6a:e5cc:{fe80:: ffffffffffffffff0000000000000000} fe80::ec26:7fff:fe62:9dc6:{fe80:: ffffffffffffffff0000000000000000} fe80::f816:3eff:fe22:6526:{fe80:: ffffffffffffffff0000000000000000}] I0105 14:48:46.484307 204177 gateway_iptables.go:279] Chain: "nat" in table: "OVN-KUBE-NODEPORT" already exists, skipping creation I0105 14:48:46.488041 204177 gateway_iptables.go:282] Chain: "filter" in table: "OVN-KUBE-NODEPORT" already exists, skipping creation I0105 14:48:46.491587 204177 gateway_iptables.go:279] Chain: "nat" in table: "OVN-KUBE-EXTERNALIP" already exists, skipping creation I0105 14:48:46.495376 204177 gateway_iptables.go:282] Chain: "filter" in table: "OVN-KUBE-EXTERNALIP" already exists, skipping creation I0105 14:48:46.495438 204177 gateway_iptables.go:43] Adding rule in table: nat, chain: PREROUTING with args: "-j OVN-KUBE-NODEPORT" for protocol: 0 I0105 14:48:46.500212 204177 gateway_iptables.go:46] Chain: "nat" in table: "PREROUTING" already exists, skipping creation I0105 14:48:46.504343 204177 gateway_iptables.go:43] Adding rule in table: nat, chain: OUTPUT with args: "-j OVN-KUBE-NODEPORT" for protocol: 0 I0105 14:48:46.507923 204177 gateway_iptables.go:46] Chain: "nat" in table: "OUTPUT" already exists, skipping creation I0105 14:48:46.511640 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-j OVN-KUBE-NODEPORT" for protocol: 0 I0105 14:48:46.515338 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.518978 204177 gateway_iptables.go:43] Adding rule in table: nat, chain: PREROUTING with args: "-j OVN-KUBE-EXTERNALIP" for protocol: 0 I0105 14:48:46.522480 204177 gateway_iptables.go:46] Chain: "nat" in table: "PREROUTING" already exists, skipping creation I0105 14:48:46.526117 204177 gateway_iptables.go:43] Adding rule in table: nat, chain: OUTPUT with args: "-j OVN-KUBE-EXTERNALIP" for protocol: 0 I0105 14:48:46.529845 204177 gateway_iptables.go:46] Chain: "nat" in table: "OUTPUT" already exists, skipping creation I0105 14:48:46.533605 204177 gateway_iptables.go:43] Adding rule in table: filter, chain: FORWARD with args: "-j OVN-KUBE-EXTERNALIP" for protocol: 0 I0105 14:48:46.537328 204177 gateway_iptables.go:46] Chain: "filter" in table: "FORWARD" already exists, skipping creation I0105 14:48:46.541157 204177 ovs.go:166] exec(15): /usr/sbin/ip rule I0105 14:48:46.542875 204177 ovs.go:169] exec(15): stdout: "0:\tfrom all lookup local\n32765:\tfrom all lookup 6\n32766:\tfrom all lookup main\n32767:\tfrom all lookup default\n" I0105 14:48:46.542914 204177 ovs.go:170] exec(15): stderr: "" I0105 14:48:46.542962 204177 node.go:229] Waiting for gateway and management port readiness... I0105 14:48:46.543102 204177 ovs.go:166] exec(16): /usr/bin/ovs-vsctl --timeout=15 --if-exists get interface ovn-k8s-mp0 ofport E0105 14:48:46.543067 204177 runtime.go:78] Observed a panic: "invalid memory address or nil pointer dereference" (runtime error: invalid memory address or nil pointer dereference) goroutine 262 [running]: k8s.io/apimachinery/pkg/util/runtime.logPanic(0x12ed1a0, 0x21f7e20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:74 +0x84 k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:48 +0x9c panic(0x12ed1a0, 0x21f7e20) /usr/lib/golang/src/runtime/panic.go:969 +0x18c github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0x14f94, 0xc000822c40, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc00085c788, 0x43797469726f6900, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc000a040e0, 0xc000686f88, 0xc000a040e0, 0xc00054a0c0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc00085c788, 0x5baab8, 0x7bf18) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc000822a20, 0xc000aae1e0, 0xc0002e3610) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc panic: runtime error: invalid memory address or nil pointer dereference [recovered] panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x10f0328] goroutine 262 [running]: k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x148 panic(0x12ed1a0, 0x21f7e20) /usr/lib/golang/src/runtime/panic.go:969 +0x18c github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1.1(0x14f94, 0xc000822c40, 0x20) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:45 +0x28 k8s.io/apimachinery/pkg/util/wait.runConditionWithCrashProtection(0xc00085c788, 0x43797469726f6900, 0x0, 0x0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:211 +0x68 k8s.io/apimachinery/pkg/util/wait.pollImmediateInternal(0xc000a040e0, 0xc000177f88, 0xc000a040e0, 0xc00054a0c0) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:445 +0x2c k8s.io/apimachinery/pkg/util/wait.PollImmediate(0x1dcd6500, 0x45d964b800, 0xc00085c788, 0x5baab8, 0x7bf18) /go/src/github.com/openshift/ovn-kubernetes/go-controller/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:441 +0x48 github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait.func1(0xc000822a20, 0xc000aae1e0, 0xc0002e3610) /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:44 +0x8c created by github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node.(*startupWaiter).Wait /go/src/github.com/openshift/ovn-kubernetes/go-controller/pkg/node/startup-waiter.go:42 +0xcc [root@master-0 core]# 22b377f65c1df I see the following in loop in the container logs for ovnkube-master on master-0: I1231 10:45:34.061835 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 10:46:05.519385 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 10:46:05.519413 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 10:46:46.189753 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 10:46:46.189778 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 10:47:09.967358 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 10:47:09.967481 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 10:47:37.942508 1 reflector.go:530] github.com/openshift/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/informers/externalversions/factory.go:117: Watch close - *v1.EgressIP total 0 items received I1231 10:47:43.005787 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 10:47:43.005904 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 10:47:53.898420 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Pod total 63 items received I1231 10:47:57.017575 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Namespace total 0 items received I1231 10:48:25.178771 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Service total 0 items received I1231 10:48:26.000157 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 10:48:26.000193 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 10:48:33.108533 1 reflector.go:530] k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions/factory.go:117: Watch close - *v1beta1.CustomResourceDefinition total 0 items received W1231 10:48:33.110703 1 warnings.go:70] apiextensions.k8s.io/v1beta1 CustomResourceDefinition is deprecated in v1.16+, unavailable in v1.22+; use apiextensions.k8s.io/v1 CustomResourceDefinition I1231 10:48:44.926987 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Node total 17 items received I1231 10:48:45.958182 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Endpoints total 0 items received ovn-controller logs on master-0: [root@master-0 core]# crictl logs 9e125bd86f142 2020-12-31T06:21:29+00:00 - starting ovn-controller 2020-12-31T06:21:29Z|00001|reconnect|INFO|unix:/var/run/openvswitch/db.sock: connecting... 2020-12-31T06:21:29Z|00002|reconnect|INFO|unix:/var/run/openvswitch/db.sock: connected 2020-12-31T06:21:29Z|00003|main|INFO|OVN internal version is : [20.09.0-2.11.0-51.0] 2020-12-31T06:21:29Z|00004|main|INFO|OVS IDL reconnected, force recompute. 2020-12-31T06:21:29Z|00005|main|INFO|OVNSB IDL reconnected, force recompute. 2020-12-31T06:21:34Z|00006|reconnect|INFO|ssl:9.47.89.3:9642: connecting... 2020-12-31T06:21:34Z|00007|reconnect|INFO|ssl:9.47.89.3:9642: connected 2020-12-31T06:21:34Z|00008|chassis|INFO|Need to specify an encap type and ip 2020-12-31T06:21:34Z|00009|ofctrl|INFO|unix:/var/run/openvswitch/br-int.mgmt: connecting to switch 2020-12-31T06:21:34Z|00010|rconn|INFO|unix:/var/run/openvswitch/br-int.mgmt: connecting... 2020-12-31T06:21:34Z|00011|rconn|INFO|unix:/var/run/openvswitch/br-int.mgmt: connected 2020-12-31T06:21:34Z|00012|chassis|INFO|Need to specify an encap type and ip 2020-12-31T06:21:34Z|00013|chassis|INFO|Need to specify an encap type and ip 2020-12-31T06:21:34Z|00014|chassis|INFO|Need to specify an encap type and ip 2020-12-31T06:21:34Z|00001|pinctrl(ovn_pinctrl0)|INFO|unix:/var/run/openvswitch/br-int.mgmt: connecting to switch 2020-12-31T06:21:34Z|00002|rconn(ovn_pinctrl0)|INFO|unix:/var/run/openvswitch/br-int.mgmt: connecting... 2020-12-31T06:21:34Z|00003|rconn(ovn_pinctrl0)|INFO|unix:/var/run/openvswitch/br-int.mgmt: connected 2020-12-31T06:21:35Z|00015|binding|INFO|Claiming lport k8s-master-0 for this chassis. 2020-12-31T06:21:35Z|00016|binding|INFO|k8s-master-0: Claiming 0a:64:ad:6a:e5:cc 10.131.0.2 ovnkube-master logs on Master-1: 31 07:32:08.122894 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:32:37.669474 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Endpoints total 0 items received I1231 07:32:49.284381 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:32:49.284406 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:33:17.826018 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Namespace total 0 items received I1231 07:33:29.628114 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:33:29.628208 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:33:45.279763 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Pod total 52 items received I1231 07:33:55.581416 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:33:55.581538 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:34:17.395097 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:34:17.395202 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:34:23.198067 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Service total 0 items received I1231 07:34:49.443617 1 reflector.go:530] github.com/openshift/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/informers/externalversions/factory.go:117: Watch close - *v1.EgressIP total 0 items received I1231 07:34:57.459814 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:34:57.459838 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:35:32.636802 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:35:32.636928 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:35:54.967597 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:35:54.967614 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:35:59.520155 1 reflector.go:530] k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions/factory.go:117: Watch close - *v1beta1.CustomResourceDefinition total 0 items received W1231 07:35:59.521641 1 warnings.go:70] apiextensions.k8s.io/v1beta1 CustomResourceDefinition is deprecated in v1.16+, unavailable in v1.22+; use apiextensions.k8s.io/v1 CustomResourceDefinition I1231 07:36:15.326957 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:36:15.326974 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:36:36.980331 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:36:36.980356 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:37:20.949515 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:37:20.949534 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:37:56.533859 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:37:56.533880 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:38:40.190212 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:38:40.190231 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:38:57.717002 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Node total 16 items received I1231 07:39:20.230505 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:39:20.230529 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:39:56.126480 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:39:56.126502 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:40:37.563795 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.NetworkPolicy total 0 items received I1231 07:40:39.085659 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:40:39.085800 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:41:03.533391 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:41:03.533415 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:41:46.765836 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:41:46.765854 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master I1231 07:42:26.785454 1 leaderelection.go:346] lock is held by master-2 and has not yet expired I1231 07:42:26.785545 1 leaderelection.go:248] failed to acquire lease openshift-ovn-kubernetes/ovn-kubernetes-master ovnkube-master logs on Master-2: E1231 23:44:55.728933 1 ovn.go:880] unable to use node for egress assignment, err: k8s.ovn.org/node-primary-ifaddr annotation not found for node "master-1" I1231 23:44:58.629799 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Node total 19 items received I1231 23:45:07.504775 1 ovn.go:574] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] retry pod setup I1231 23:45:07.504895 1 ovn.go:580] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] setup retry failed; will try again later I1231 23:45:07.504912 1 ovn.go:574] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] retry pod setup I1231 23:45:07.504918 1 ovn.go:580] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] setup retry failed; will try again later I1231 23:45:42.934853 1 reflector.go:530] github.com/openshift/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/informers/externalversions/factory.go:117: Watch close - *v1.EgressIP total 0 items received I1231 23:46:07.505199 1 ovn.go:574] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] retry pod setup I1231 23:46:07.505216 1 ovn.go:580] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] setup retry failed; will try again later I1231 23:46:07.505229 1 ovn.go:574] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] retry pod setup I1231 23:46:07.505234 1 ovn.go:580] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] setup retry failed; will try again later I1231 23:46:37.633422 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.NetworkPolicy total 0 items received I1231 23:47:07.505458 1 ovn.go:574] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] retry pod setup I1231 23:47:07.505604 1 ovn.go:580] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] setup retry failed; will try again later I1231 23:47:07.505625 1 ovn.go:574] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] retry pod setup I1231 23:47:07.505633 1 ovn.go:580] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] setup retry failed; will try again later I1231 23:47:54.161976 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Endpoints total 3 items received E1231 23:48:00.018509 1 ovn.go:880] unable to use node for egress assignment, err: k8s.ovn.org/node-primary-ifaddr annotation not found for node "worker-0" E1231 23:48:00.058476 1 ovn.go:880] unable to use node for egress assignment, err: k8s.ovn.org/node-primary-ifaddr annotation not found for node "master-2" I1231 23:48:07.505937 1 ovn.go:574] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] retry pod setup I1231 23:48:07.505961 1 ovn.go:580] [a370133e-938b-4c4d-9715-acb237f4d86d/openshift-image-registry/image-registry-558995999-wx8vc] setup retry failed; will try again later I1231 23:48:07.505972 1 ovn.go:574] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] retry pod setup I1231 23:48:07.505978 1 ovn.go:580] [49778fe3-292a-4ccb-9e8b-34fbe783e41c/openshift-image-registry/image-registry-f984dc4b6-dld6q] setup retry failed; will try again later E1231 23:48:10.259857 1 ovn.go:880] unable to use node for egress assignment, err: k8s.ovn.org/node-primary-ifaddr annotation not found for node "master-0" E1231 23:48:10.301462 1 ovn.go:880] unable to use node for egress assignment, err: k8s.ovn.org/node-primary-ifaddr annotation not found for node "worker-1" I1231 23:48:28.083530 1 reflector.go:530] k8s.io/client-go/informers/factory.go:134: Watch close - *v1.Namespace total 0 items received
*** This bug has been marked as a duplicate of bug 1908231 ***
The needinfo request[s] on this closed bug have been removed as they have been unresolved for 500 days