Bug 1858907
| Summary: | Panic in machine-config-operator when attempting to upgrade to 4.5.2 | ||
|---|---|---|---|
| Product: | OpenShift Container Platform | Reporter: | Kirsten Garrison <kgarriso> |
| Component: | Machine Config Operator | Assignee: | Antonio Murdaca <amurdaca> |
| Status: | CLOSED ERRATA | QA Contact: | Michael Nguyen <mnguyen> |
| Severity: | urgent | Docs Contact: | |
| Priority: | urgent | ||
| Version: | 4.5 | CC: | amurdaca, jbrooks, kechung, kgarriso, mifiedle, mnguyen, sdodson, veer, vpagar, walters, wking, xtian |
| Target Milestone: | --- | Keywords: | Upgrades |
| Target Release: | 4.5.z | ||
| Hardware: | x86_64 | ||
| OS: | Linux | ||
| Whiteboard: | |||
| Fixed In Version: | Doc Type: | If docs needed, set a value | |
| Doc Text: | Story Points: | --- | |
| Clone Of: | 1858026 | Environment: | |
| Last Closed: | 2020-07-30 18:56:59 UTC | Type: | --- |
| Regression: | --- | Mount Type: | --- |
| Documentation: | --- | CRM: | |
| Verified Versions: | Category: | --- | |
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |
| Cloudforms Team: | --- | Target Upstream Version: | |
| Embargoed: | |||
| Bug Depends On: | 1858026 | ||
| Bug Blocks: | |||
[root@helper ~]# oc get infrastructure -o yaml
apiVersion: v1
items:
- apiVersion: config.openshift.io/v1
kind: Infrastructure
metadata:
creationTimestamp: "2020-07-24T15:04:20Z"
generation: 1
name: cluster
resourceVersion: "431"
selfLink: /apis/config.openshift.io/v1/infrastructures/cluster
uid: cb989152-5650-4139-9273-f2a45b879f46
spec:
cloudConfig:
name: ""
status:
apiServerInternalURI: https://api-int.ocp4.example.com:6443
apiServerURL: https://api.ocp4.example.com:6443
etcdDiscoveryDomain: ocp4.example.com
infrastructureName: ocp4-6m4hk
platform: None
platformStatus:
type: None
kind: List
metadata:
resourceVersion: ""
selfLink: ""
[root@helper ~]# oc edit infrastructure
infrastructure.config.openshift.io/cluster edited
[root@helper ~]# oc get infrastructure -o yaml
apiVersion: v1
items:
- apiVersion: config.openshift.io/v1
kind: Infrastructure
metadata:
creationTimestamp: "2020-07-24T15:04:20Z"
generation: 2
name: cluster
resourceVersion: "88888"
selfLink: /apis/config.openshift.io/v1/infrastructures/cluster
uid: cb989152-5650-4139-9273-f2a45b879f46
spec:
cloudConfig:
name: ""
status:
apiServerInternalURI: https://api-int.ocp4.example.com:6443
apiServerURL: https://api.ocp4.example.com:6443
etcdDiscoveryDomain: ocp4.example.com
infrastructureName: ocp4-6m4hk
platform: None
kind: List
metadata:
resourceVersion: ""
selfLink: ""
[root@helper ~]# oc adm upgrade --force --allow-explicit-upgrade --to-image=quay.io/openshift-release-dev/ocp-release:4.5.2-x86_64
Updating to release image quay.io/openshift-release-dev/ocp-release:4.5.2-x86_64
[root@helper ~]# watch oc get clusterversion
[root@helper ~]# oc get clusterversion
NAME VERSION AVAILABLE PROGRESSING SINCE STATUS
version 4.4.13 True True 13s Working towards 4.5.2: 1% complete
[root@helper ~]# watch oc get clusterversion
[root@helper ~]# oc get co
NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE
authentication 4.5.2 True False False 3h10m
cloud-credential 4.5.2 True False False 4h
cluster-autoscaler 4.5.2 True False False 3h46m
config-operator 4.5.2 True False False 37m
console 4.5.2 True False False 29m
csi-snapshot-controller 4.5.2 True False False 3h14m
dns 4.5.2 True False False 3h55m
etcd 4.5.2 True False False 3h54m
image-registry 4.5.2 True False False 3h48m
ingress 4.5.2 True False False 3h15m
insights 4.5.2 True False False 3h47m
kube-apiserver 4.5.2 True False False 3h54m
kube-controller-manager 4.5.2 True False False 3h54m
kube-scheduler 4.5.2 True False False 3h54m
kube-storage-version-migrator 4.5.2 True False False 3h15m
machine-api 4.5.2 True False False 3h47m
machine-approver 4.5.2 True False False 32m
machine-config 4.4.13 True True False 3h54m
marketplace 4.5.2 True False False 30m
monitoring 4.5.2 True False False 27m
network 4.5.2 True False False 3h56m
node-tuning 4.5.2 True False False 31m
openshift-apiserver 4.5.2 True False False 32m
openshift-controller-manager 4.5.2 True False False 31m
openshift-samples 4.5.2 True False False 30m
operator-lifecycle-manager 4.5.2 True False False 3h55m
operator-lifecycle-manager-catalog 4.5.2 True False False 3h55m
operator-lifecycle-manager-packageserver 4.5.2 True False False 29m
service-ca 4.5.2 True False False 3h56m
service-catalog-apiserver 4.4.13 True False False 3h56m
service-catalog-controller-manager 4.4.13 True False False 3h56m
storage 4.5.2 True False False 31m
[root@helper ~]# oc -n openshift-machine-config-operator get pods
NAME READY STATUS RESTARTS AGE
etcd-quorum-guard-54896968c-dnz56 1/1 Running 0 3h55m
etcd-quorum-guard-54896968c-lrqft 1/1 Running 0 3h55m
etcd-quorum-guard-54896968c-mvm9j 1/1 Running 0 3h55m
machine-config-controller-5b89ddfc68-24h6g 1/1 Running 1 3h56m
machine-config-daemon-b5crz 2/2 Running 0 3h57m
machine-config-daemon-fzvmt 2/2 Running 0 3h57m
machine-config-daemon-gxnx4 2/2 Running 0 3h17m
machine-config-daemon-s2h4n 2/2 Running 0 3h57m
machine-config-daemon-v8lwb 2/2 Running 0 3h17m
machine-config-operator-7974485c8b-g2986 1/1 Running 6 18m
machine-config-server-79hsh 1/1 Running 0 3h56m
machine-config-server-7k9v6 1/1 Running 0 3h56m
machine-config-server-8xvhm 1/1 Running 0 3h56m
[root@helper ~]# oc -n openshift-machine-config-operator logs machine-config-operator-7974485c8b-g2986 -p
I0724 19:00:54.328653 1 start.go:46] Version: 4.5.2 (Raw: v4.5.0-202007131801.p0-dirty, Hash: 4173030d89fbf4a7a0976d1665491a4d9a6e54f1)
I0724 19:00:54.334647 1 leaderelection.go:242] attempting to acquire leader lease openshift-machine-config-operator/machine-config...
E0724 19:02:52.339652 1 event.go:316] Could not construct reference to: '&v1.ConfigMap{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ObjectMeta:v1.ObjectMeta{Name:"machine-config", GenerateName:"", Namespace:"openshift-machine-config-operator", SelfLink:"/api/v1/namespaces/openshift-machine-config-operator/configmaps/machine-config", UID:"7a0de4ef-a55d-454c-ab4d-338027bf9853", ResourceVersion:"110190", Generation:0, CreationTimestamp:v1.Time{Time:time.Time{wall:0x0, ext:63731200148, loc:(*time.Location)(0x2530700)}}, DeletionTimestamp:(*v1.Time)(nil), DeletionGracePeriodSeconds:(*int64)(nil), Labels:map[string]string(nil), Annotations:map[string]string{"control-plane.alpha.kubernetes.io/leader":"{\"holderIdentity\":\"machine-config-operator-7974485c8b-g2986_5806b26e-c8ae-49b1-9758-f10530b3c384\",\"leaseDurationSeconds\":90,\"acquireTime\":\"2020-07-24T19:02:52Z\",\"renewTime\":\"2020-07-24T19:02:52Z\",\"leaderTransitions\":7}"}, OwnerReferences:[]v1.OwnerReference(nil), Finalizers:[]string(nil), ClusterName:"", ManagedFields:[]v1.ManagedFieldsEntry(nil)}, Immutable:(*bool)(nil), Data:map[string]string(nil), BinaryData:map[string][]uint8(nil)}' due to: 'no kind is registered for the type v1.ConfigMap in scheme "github.com/openshift/machine-config-operator/cmd/common/helpers.go:30"'. Will not report event: 'Normal' 'LeaderElection' 'machine-config-operator-7974485c8b-g2986_5806b26e-c8ae-49b1-9758-f10530b3c384 became leader'
I0724 19:02:52.340151 1 leaderelection.go:252] successfully acquired lease openshift-machine-config-operator/machine-config
I0724 19:02:53.022397 1 operator.go:265] Starting MachineConfigOperator
E0724 19:02:53.051227 1 runtime.go:78] Observed a panic: "invalid memory address or nil pointer dereference" (runtime error: invalid memory address or nil pointer dereference)
goroutine 252 [running]:
k8s.io/apimachinery/pkg/util/runtime.logPanic(0x1577500, 0x25113c0)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:74 +0xa3
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:48 +0x82
panic(0x1577500, 0x25113c0)
/opt/rh/go-toolset-1.13/root/usr/lib/go-toolset-1.13-golang/src/runtime/panic.go:679 +0x1b2
github.com/openshift/machine-config-operator/pkg/operator.isCloudConfigRequired(...)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:105
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncCloudConfig(0xc0000d2000, 0xc000caf320, 0xc0002e3040, 0x8, 0xe)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:120 +0x237
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncRenderConfig(0xc0000d2000, 0x0, 0xc0023e0431, 0xce0e10ba32c)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:255 +0x865
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncAll(0xc0000d2000, 0xc0005b5c98, 0x6, 0x6, 0xc0005e6c01, 0x413713)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:59 +0x177
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).sync(0xc0000d2000, 0xc0004e2390, 0x30, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:357 +0x37e
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).processNextWorkItem(0xc0000d2000, 0xc0006ad600)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:313 +0x102
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).worker(0xc0000d2000)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:302 +0x2b
k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc0006df620)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:155 +0x5e
k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc0006df620, 0x19960e0, 0xc000647e30, 0xc0000ca001, 0xc00015e120)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:156 +0xa3
k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc0006df620, 0x3b9aca00, 0x0, 0x1, 0xc00015e120)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:133 +0xe2
k8s.io/apimachinery/pkg/util/wait.Until(0xc0006df620, 0x3b9aca00, 0xc00015e120)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:90 +0x4d
created by github.com/openshift/machine-config-operator/pkg/operator.(*Operator).Run
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:271 +0x41f
panic: runtime error: invalid memory address or nil pointer dereference [recovered]
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x13faff7]
goroutine 252 [running]:
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x105
panic(0x1577500, 0x25113c0)
/opt/rh/go-toolset-1.13/root/usr/lib/go-toolset-1.13-golang/src/runtime/panic.go:679 +0x1b2
github.com/openshift/machine-config-operator/pkg/operator.isCloudConfigRequired(...)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:105
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncCloudConfig(0xc0000d2000, 0xc000caf320, 0xc0002e3040, 0x8, 0xe)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:120 +0x237
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncRenderConfig(0xc0000d2000, 0x0, 0xc0023e0431, 0xce0e10ba32c)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:255 +0x865
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncAll(0xc0000d2000, 0xc0005b5c98, 0x6, 0x6, 0xc0005e6c01, 0x413713)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:59 +0x177
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).sync(0xc0000d2000, 0xc0004e2390, 0x30, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:357 +0x37e
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).processNextWorkItem(0xc0000d2000, 0xc0006ad600)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:313 +0x102
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).worker(0xc0000d2000)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:302 +0x2b
k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc0006df620)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:155 +0x5e
k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc0006df620, 0x19960e0, 0xc000647e30, 0xc0000ca001, 0xc00015e120)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:156 +0xa3
k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc0006df620, 0x3b9aca00, 0x0, 0x1, 0xc00015e120)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:133 +0xe2
k8s.io/apimachinery/pkg/util/wait.Until(0xc0006df620, 0x3b9aca00, 0xc00015e120)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:90 +0x4d
created by github.com/openshift/machine-config-operator/pkg/operator.(*Operator).Run
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:271 +0x41f
[root@helper ~]# oc -n openshift-machine-config-operator logs machine-config-operator-7974485c8b-g2986
I0724 19:05:43.401200 1 start.go:46] Version: 4.5.2 (Raw: v4.5.0-202007131801.p0-dirty, Hash: 4173030d89fbf4a7a0976d1665491a4d9a6e54f1)
I0724 19:05:43.404600 1 leaderelection.go:242] attempting to acquire leader lease openshift-machine-config-operator/machine-config...
[root@helper ~]# oc -n openshift-machine-config-operator get pods
NAME READY STATUS RESTARTS AGE
etcd-quorum-guard-54896968c-dnz56 1/1 Running 0 3h56m
etcd-quorum-guard-54896968c-lrqft 1/1 Running 0 3h56m
etcd-quorum-guard-54896968c-mvm9j 1/1 Running 0 3h56m
machine-config-controller-5b89ddfc68-24h6g 1/1 Running 1 3h56m
machine-config-daemon-b5crz 2/2 Running 0 3h57m
machine-config-daemon-fzvmt 2/2 Running 0 3h57m
machine-config-daemon-gxnx4 2/2 Running 0 3h18m
machine-config-daemon-s2h4n 2/2 Running 0 3h57m
machine-config-daemon-v8lwb 2/2 Running 0 3h18m
machine-config-operator-7974485c8b-g2986 1/1 Running 6 19m
machine-config-server-79hsh 1/1 Running 0 3h56m
machine-config-server-7k9v6 1/1 Running 0 3h56m
machine-config-server-8xvhm 1/1 Running 0 3h56m
[root@helper ~]# oc get infrastructure -o yaml
apiVersion: v1
items:
- apiVersion: config.openshift.io/v1
kind: Infrastructure
metadata:
creationTimestamp: "2020-07-24T15:04:20Z"
generation: 2
name: cluster
resourceVersion: "88888"
selfLink: /apis/config.openshift.io/v1/infrastructures/cluster
uid: cb989152-5650-4139-9273-f2a45b879f46
spec:
cloudConfig:
name: ""
status:
apiServerInternalURI: https://api-int.ocp4.example.com:6443
apiServerURL: https://api.ocp4.example.com:6443
etcdDiscoveryDomain: ocp4.example.com
infrastructureName: ocp4-6m4hk
platform: None
kind: List
metadata:
resourceVersion: ""
selfLink: ""
[root@helper ~]# oc get clusterversion
NAME VERSION AVAILABLE PROGRESSING SINCE STATUS
version 4.4.13 True True 43m Unable to apply 4.5.2: the cluster operator machine-config has not yet successfully rolled out
[root@helper ~]# oc adm upgrade --force --to-image=registry.svc.ci.openshift.org/ocp/release:4.5.0-0.nightly-2020-07-24-091850 --allow-explicit-upgrade
Updating to release image registry.svc.ci.openshift.org/ocp/release:4.5.0-0.nightly-2020-07-24-091850
[root@helper ~]# oc get clusterversion
NAME VERSION AVAILABLE PROGRESSING SINCE STATUS
version 4.4.13 True True 44m Working towards registry.svc.ci.openshift.org/ocp/release:4.5.0-0.nightly-2020-07-24-091850: downloading update
[root@helper ~]# watch oc get clusterversion
[root@helper ~]# oc get clusterversion
NAME VERSION AVAILABLE PROGRESSING SINCE STATUS
version 4.5.0-0.nightly-2020-07-24-091850 True False 2m16s Cluster version is 4.5.0-0.nightly-2020-07-24-091850
[root@helper ~]# oc get co
NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE
authentication 4.5.0-0.nightly-2020-07-24-091850 True False False 4h5m
cloud-credential 4.5.0-0.nightly-2020-07-24-091850 True False False 4h55m
cluster-autoscaler 4.5.0-0.nightly-2020-07-24-091850 True False False 4h41m
config-operator 4.5.0-0.nightly-2020-07-24-091850 True False False 92m
console 4.5.0-0.nightly-2020-07-24-091850 True False False 19m
csi-snapshot-controller 4.5.0-0.nightly-2020-07-24-091850 True False False 27m
dns 4.5.0-0.nightly-2020-07-24-091850 True False False 4h49m
etcd 4.5.0-0.nightly-2020-07-24-091850 True False False 4h49m
image-registry 4.5.0-0.nightly-2020-07-24-091850 True False False 4h42m
ingress 4.5.0-0.nightly-2020-07-24-091850 True False False 4h10m
insights 4.5.0-0.nightly-2020-07-24-091850 True False False 4h42m
kube-apiserver 4.5.0-0.nightly-2020-07-24-091850 True False False 4h49m
kube-controller-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 4h49m
kube-scheduler 4.5.0-0.nightly-2020-07-24-091850 True False False 4h49m
kube-storage-version-migrator 4.5.0-0.nightly-2020-07-24-091850 True False False 23m
machine-api 4.5.0-0.nightly-2020-07-24-091850 True False False 4h42m
machine-approver 4.5.0-0.nightly-2020-07-24-091850 True False False 86m
machine-config 4.5.0-0.nightly-2020-07-24-091850 True False False 6m31s
marketplace 4.5.0-0.nightly-2020-07-24-091850 True False False 19m
monitoring 4.5.0-0.nightly-2020-07-24-091850 True False False 41m
network 4.5.0-0.nightly-2020-07-24-091850 True False False 4h51m
node-tuning 4.5.0-0.nightly-2020-07-24-091850 True False False 41m
openshift-apiserver 4.5.0-0.nightly-2020-07-24-091850 True False False 16m
openshift-controller-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 43m
openshift-samples 4.5.0-0.nightly-2020-07-24-091850 True False False 41m
operator-lifecycle-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 4h50m
operator-lifecycle-manager-catalog 4.5.0-0.nightly-2020-07-24-091850 True False False 4h50m
operator-lifecycle-manager-packageserver 4.5.0-0.nightly-2020-07-24-091850 True False False 11m
service-ca 4.5.0-0.nightly-2020-07-24-091850 True False False 4h51m
storage 4.5.0-0.nightly-2020-07-24-091850 True False False 43m
I tested thse two upgrade paths successfully: 4.4.13 with the reproducer -> 4.5.0-0.nightly-2020-07-24-091850 4.4.13 with the reproducer -> 4.5.2 (which broke with the panic) -> 4.5.0-0.nightly-2020-07-24-091850 Users just need to upgrade to a 4.5 release with the fix to resolve the panic issue. *** Bug 1859781 has been marked as a duplicate of this bug. *** I tested and was able to successfully recover my cluster from an MCO panic by upgrading to the 4.5.4 release.
This cluster was originally built as 4.1.0 and incrementally upgraded to 4.4.12. I then upgrade from 4.4.12 to 4.5.3, which results in MCO panic:
$ oc logs machine-config-operator-b779c6cd5-wwcw5
I0727 18:54:54.436311 1 start.go:46] Version: 4.5.3 (Raw: v4.5.0-202007172106.p0-dirty, Hash: 4173030d89fbf4a7a0976d1665491a4d9a6e54f1)
I0727 18:54:54.444399 1 leaderelection.go:242] attempting to acquire leader lease openshift-machine-config-operator/machine-config...
E0727 18:56:52.313324 1 event.go:316] Could not construct reference to: '&v1.ConfigMap{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ObjectMeta:v1.ObjectMeta{Name:"machine-config", GenerateName:"", Namespace:"openshift-machine-config-operator", SelfLink:"/api/v1/namespaces/openshift-machine-config-operator/configmaps/machine-config", UID:"bea443d6-cb93-11ea-9aec-001a4a0ab023", ResourceVersion:"2565970", Generation:0, CreationTimestamp:v1.Time{Time:time.Time{wall:0x0, ext:63730961400, loc:(*time.Location)(0x2530700)}}, DeletionTimestamp:(*v1.Time)(nil), DeletionGracePeriodSeconds:(*int64)(nil), Labels:map[string]string(nil), Annotations:map[string]string{"control-plane.alpha.kubernetes.io/leader":"{\"holderIdentity\":\"machine-config-operator-b779c6cd5-wwcw5_340f155d-2018-417e-9326-e84afbd7a6d6\",\"leaseDurationSeconds\":90,\"acquireTime\":\"2020-07-27T18:56:52Z\",\"renewTime\":\"2020-07-27T18:56:52Z\",\"leaderTransitions\":16}"}, OwnerReferences:[]v1.OwnerReference(nil), Finalizers:[]string(nil), ClusterName:"", ManagedFields:[]v1.ManagedFieldsEntry(nil)}, Immutable:(*bool)(nil), Data:map[string]string(nil), BinaryData:map[string][]uint8(nil)}' due to: 'no kind is registered for the type v1.ConfigMap in scheme "github.com/openshift/machine-config-operator/cmd/common/helpers.go:30"'. Will not report event: 'Normal' 'LeaderElection' 'machine-config-operator-b779c6cd5-wwcw5_340f155d-2018-417e-9326-e84afbd7a6d6 became leader'
I0727 18:56:52.314059 1 leaderelection.go:252] successfully acquired lease openshift-machine-config-operator/machine-config
I0727 18:56:53.307756 1 operator.go:265] Starting MachineConfigOperator
E0727 18:56:53.365535 1 runtime.go:78] Observed a panic: "invalid memory address or nil pointer dereference" (runtime error: invalid memory address or nil pointer dereference)
goroutine 238 [running]:
k8s.io/apimachinery/pkg/util/runtime.logPanic(0x1577500, 0x25113c0)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:74 +0xa3
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:48 +0x82
panic(0x1577500, 0x25113c0)
/opt/rh/go-toolset-1.13/root/usr/lib/go-toolset-1.13-golang/src/runtime/panic.go:679 +0x1b2
github.com/openshift/machine-config-operator/pkg/operator.isCloudConfigRequired(...)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:105
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncCloudConfig(0xc00017f8c0, 0xc000d36120, 0xc000315860, 0x8, 0xe)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:120 +0x237
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncRenderConfig(0xc00017f8c0, 0x0, 0xc013928263, 0x1c55b43ef492b)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:255 +0x865
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncAll(0xc00017f8c0, 0xc000735c98, 0x6, 0x6, 0xc0006fdc01, 0x413713)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:59 +0x177
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).sync(0xc00017f8c0, 0xc0000d8180, 0x30, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:357 +0x37e
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).processNextWorkItem(0xc00017f8c0, 0xc000686600)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:313 +0x102
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).worker(0xc00017f8c0)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:302 +0x2b
k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc000c34010)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:155 +0x5e
k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc000c34010, 0x19960e0, 0xc00027e0c0, 0xc00000e001, 0xc0000f2480)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:156 +0xa3
k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc000c34010, 0x3b9aca00, 0x0, 0x1, 0xc0000f2480)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:133 +0xe2
k8s.io/apimachinery/pkg/util/wait.Until(0xc000c34010, 0x3b9aca00, 0xc0000f2480)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:90 +0x4d
created by github.com/openshift/machine-config-operator/pkg/operator.(*Operator).Run
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:271 +0x41f
panic: runtime error: invalid memory address or nil pointer dereference [recovered]
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x13faff7]
goroutine 238 [running]:
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:55 +0x105
panic(0x1577500, 0x25113c0)
/opt/rh/go-toolset-1.13/root/usr/lib/go-toolset-1.13-golang/src/runtime/panic.go:679 +0x1b2
github.com/openshift/machine-config-operator/pkg/operator.isCloudConfigRequired(...)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:105
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncCloudConfig(0xc00017f8c0, 0xc000d36120, 0xc000315860, 0x8, 0xe)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:120 +0x237
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncRenderConfig(0xc00017f8c0, 0x0, 0xc013928263, 0x1c55b43ef492b)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:255 +0x865
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).syncAll(0xc00017f8c0, 0xc000735c98, 0x6, 0x6, 0xc0006fdc01, 0x413713)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/sync.go:59 +0x177
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).sync(0xc00017f8c0, 0xc0000d8180, 0x30, 0x0, 0x0)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:357 +0x37e
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).processNextWorkItem(0xc00017f8c0, 0xc000686600)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:313 +0x102
github.com/openshift/machine-config-operator/pkg/operator.(*Operator).worker(0xc00017f8c0)
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:302 +0x2b
k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc000c34010)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:155 +0x5e
k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc000c34010, 0x19960e0, 0xc00027e0c0, 0xc00000e001, 0xc0000f2480)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:156 +0xa3
k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc000c34010, 0x3b9aca00, 0x0, 0x1, 0xc0000f2480)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:133 +0xe2
k8s.io/apimachinery/pkg/util/wait.Until(0xc000c34010, 0x3b9aca00, 0xc0000f2480)
/go/src/github.com/openshift/machine-config-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:90 +0x4d
created by github.com/openshift/machine-config-operator/pkg/operator.(*Operator).Run
/go/src/github.com/openshift/machine-config-operator/pkg/operator/operator.go:271 +0x41f
---
I then upgrade from this state (4.4.12/4.5.3 with stuck MCO) to 4.5.4 successfully, using candidate-4.5 via Web UI to upgrade:
$ oc get co
NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE
authentication 4.5.4 True False False 6d19h
cloud-credential 4.5.4 True False False 6d19h
cluster-autoscaler 4.5.4 True False False 6d19h
config-operator 4.5.4 True False False 22h
console 4.5.4 True False False 28m
csi-snapshot-controller 4.5.4 True False False 35m
dns 4.5.4 True False False 6d19h
etcd 4.5.4 True False False 6d16h
image-registry 4.5.4 True False False 25m
ingress 4.5.4 True False False 6d19h
insights 4.5.4 True False False 6d18h
kube-apiserver 4.5.4 True False False 6d19h
kube-controller-manager 4.5.4 True False False 6d19h
kube-scheduler 4.5.4 True False False 6d16h
kube-storage-version-migrator 4.5.4 True False False 24m
machine-api 4.5.4 True False False 6d19h
machine-approver 4.5.4 True False False 22h
machine-config 4.5.4 True False False 14m
marketplace 4.5.4 True False False 18m
monitoring 4.5.4 True False False 22h
network 4.5.4 True False False 6d19h
node-tuning 4.5.4 True False False 50m
openshift-apiserver 4.5.4 True False False 22h
openshift-controller-manager 4.5.4 True False False 6d19h
openshift-samples 4.5.4 True False False 50m
operator-lifecycle-manager 4.5.4 True False False 6d19h
operator-lifecycle-manager-catalog 4.5.4 True False False 6d19h
operator-lifecycle-manager-packageserver 4.5.4 True False False 17m
service-ca 4.5.4 True False False 6d19h
storage 4.5.4 True False False 51m
$ oc get clusterversion
NAME VERSION AVAILABLE PROGRESSING SINCE STATUS
version 4.5.4 True False 12m Cluster version is 4.5.4
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory, and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:3028 Removing UpgradeBlocker from this older bug, to remove it from the suspect queue described in [1]. If you feel like this bug still needs to be a suspect, please add keyword again. [1]: https://github.com/openshift/enhancements/pull/475 |
Verified upgrade from 4.4.13 -> 4.5.0-0.nightly-2020-07-24-091850 using the reproducer of removing `platformStatus.type=None`. [root@helper openshift]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.4.13 True False 16m Cluster version is 4.4.13 [root@helper openshift]# oc get co NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE authentication 4.4.13 True False False 17m cloud-credential 4.4.13 True False False 62m cluster-autoscaler 4.4.13 True False False 37m console 4.4.13 True False False 20m csi-snapshot-controller 4.4.13 True False False 22m dns 4.4.13 True False False 45m etcd 4.4.13 True False False 44m image-registry 4.4.13 True False False 38m ingress 4.4.13 True False False 22m insights 4.4.13 True False False 38m kube-apiserver 4.4.13 True False False 43m kube-controller-manager 4.4.13 True False False 44m kube-scheduler 4.4.13 True False False 43m kube-storage-version-migrator 4.4.13 True False False 22m machine-api 4.4.13 True False False 38m machine-config 4.4.13 True False False 45m marketplace 4.4.13 True False False 37m monitoring 4.4.13 True False False 20m network 4.4.13 True False False 46m node-tuning 4.4.13 True False False 46m openshift-apiserver 4.4.13 True False False 40m openshift-controller-manager 4.4.13 True False False 37m openshift-samples 4.4.13 True False False 36m operator-lifecycle-manager 4.4.13 True False False 45m operator-lifecycle-manager-catalog 4.4.13 True False False 45m operator-lifecycle-manager-packageserver 4.4.13 True False False 40m service-ca 4.4.13 True False False 46m service-catalog-apiserver 4.4.13 True False False 46m service-catalog-controller-manager 4.4.13 True False False 46m storage 4.4.13 True False False 37m [root@helper openshift]# oc get infrastructure -o yaml apiVersion: v1 items: - apiVersion: config.openshift.io/v1 kind: Infrastructure metadata: creationTimestamp: "2020-07-24T12:18:06Z" generation: 1 name: cluster resourceVersion: "430" selfLink: /apis/config.openshift.io/v1/infrastructures/cluster uid: 09e21c21-e9ab-4686-880a-7ab31e0ac80f spec: cloudConfig: name: "" status: apiServerInternalURI: https://api-int.ocp4.example.com:6443 apiServerURL: https://api.ocp4.example.com:6443 etcdDiscoveryDomain: ocp4.example.com infrastructureName: ocp4-j52w2 platform: None platformStatus: type: None kind: List metadata: resourceVersion: "" selfLink: "" [root@helper openshift]# oc edit infrastructure infrastructure.config.openshift.io/cluster edited [root@helper openshift]# oc get infrastructure -o yaml apiVersion: v1 items: - apiVersion: config.openshift.io/v1 kind: Infrastructure metadata: creationTimestamp: "2020-07-24T12:18:06Z" generation: 2 name: cluster resourceVersion: "32704" selfLink: /apis/config.openshift.io/v1/infrastructures/cluster uid: 09e21c21-e9ab-4686-880a-7ab31e0ac80f spec: cloudConfig: name: "" status: apiServerInternalURI: https://api-int.ocp4.example.com:6443 apiServerURL: https://api.ocp4.example.com:6443 etcdDiscoveryDomain: ocp4.example.com infrastructureName: ocp4-j52w2 platform: None kind: List metadata: resourceVersion: "" selfLink: "" [root@helper openshift]# oc adm upgrade --force --allow-explicit-upgrade --to-image=registry.svc.ci.openshift.org/ocp/release:4.5.0-0.nightly-2020-07-24-091850 Updating to release image registry.svc.ci.openshift.org/ocp/release:4.5.0-0.nightly-2020-07-24-091850 [root@helper openshift]# watch oc get clusterversion [root@helper openshift]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.4.13 True True 10s Working towards registry.svc.ci.openshift.org/ocp/release:4.5.0-0.nightly-2020-07-24-091850: downloading update [root@helper openshift]# watch oc get clusterversion [root@helper openshift]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.4.13 True True 24s Unable to apply 4.5.0-0.nightly-2020-07-24-091850: the workload openshift-cluster-version/cluster-version-operator has not yet successfully rolled out [root@helper openshift]# watch oc get clusterversion [root@helper openshift]# oc get co NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE authentication 4.4.13 True False False 19m cloud-credential 4.4.13 True False False 63m cluster-autoscaler 4.4.13 True False False 38m config-operator console 4.4.13 True False False 21m csi-snapshot-controller 4.4.13 True False False 23m dns 4.4.13 True False False 46m etcd 4.4.13 True False False 45m image-registry 4.4.13 True False False 39m ingress 4.4.13 True False False 24m insights 4.4.13 True False False 39m kube-apiserver 4.4.13 True False False 45m kube-controller-manager 4.4.13 True False False 45m kube-scheduler 4.4.13 True False False 45m kube-storage-version-migrator 4.4.13 True False False 23m machine-api 4.4.13 True False False 39m machine-approver machine-config 4.4.13 True False False 46m marketplace 4.4.13 True False False 39m monitoring 4.4.13 True False False 21m network 4.4.13 True False False 48m node-tuning 4.4.13 True False False 48m openshift-apiserver 4.4.13 True False False 41m openshift-controller-manager 4.4.13 True False False 39m openshift-samples 4.4.13 True False False 38m operator-lifecycle-manager 4.4.13 True False False 47m operator-lifecycle-manager-catalog 4.4.13 True False False 47m operator-lifecycle-manager-packageserver 4.4.13 True False False 42m service-ca 4.4.13 True False False 48m service-catalog-apiserver 4.4.13 True False False 48m service-catalog-controller-manager 4.4.13 True False False 48m storage 4.4.13 True False False 39m [root@helper openshift]# watch oc get clusterversion [root@helper openshift]# oc get co NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE authentication 4.5.0-0.nightly-2020-07-24-091850 True False False 38m cloud-credential 4.5.0-0.nightly-2020-07-24-091850 True False False 82m cluster-autoscaler 4.5.0-0.nightly-2020-07-24-091850 True False False 57m config-operator 4.5.0-0.nightly-2020-07-24-091850 True False False 16m console 4.5.0-0.nightly-2020-07-24-091850 True False False 8m10s csi-snapshot-controller 4.5.0-0.nightly-2020-07-24-091850 True False False 42m dns 4.5.0-0.nightly-2020-07-24-091850 True True False 65m etcd 4.5.0-0.nightly-2020-07-24-091850 True False False 64m image-registry 4.5.0-0.nightly-2020-07-24-091850 True False False 58m ingress 4.5.0-0.nightly-2020-07-24-091850 True False False 43m insights 4.5.0-0.nightly-2020-07-24-091850 True False False 58m kube-apiserver 4.5.0-0.nightly-2020-07-24-091850 True False False 64m kube-controller-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 64m kube-scheduler 4.5.0-0.nightly-2020-07-24-091850 True False False 64m kube-storage-version-migrator 4.5.0-0.nightly-2020-07-24-091850 True False False 10m machine-api 4.5.0-0.nightly-2020-07-24-091850 True False False 58m machine-approver 4.5.0-0.nightly-2020-07-24-091850 True False False 11m machine-config 4.4.13 True False False 6m30s marketplace 4.5.0-0.nightly-2020-07-24-091850 True False False 9m13s monitoring 4.5.0-0.nightly-2020-07-24-091850 True False False 7m39s network 4.5.0-0.nightly-2020-07-24-091850 True False False 67m node-tuning 4.5.0-0.nightly-2020-07-24-091850 True False False 10m openshift-apiserver 4.5.0-0.nightly-2020-07-24-091850 True False False 10m openshift-controller-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 58m openshift-samples 4.5.0-0.nightly-2020-07-24-091850 True False False 9m13s operator-lifecycle-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 66m operator-lifecycle-manager-catalog 4.5.0-0.nightly-2020-07-24-091850 True False False 66m operator-lifecycle-manager-packageserver 4.5.0-0.nightly-2020-07-24-091850 True False False 8m59s service-ca 4.5.0-0.nightly-2020-07-24-091850 True False False 66m service-catalog-apiserver 4.4.13 True False False 67m service-catalog-controller-manager 4.4.13 True False False 67m storage 4.5.0-0.nightly-2020-07-24-091850 True False False 11m [root@helper openshift]# oc -n openshift-machine-config-operator get pods NAME READY STATUS RESTARTS AGE etcd-quorum-guard-54896968c-kzxpc 1/1 Running 0 65m etcd-quorum-guard-54896968c-prcl7 1/1 Running 0 65m etcd-quorum-guard-54896968c-xlnz2 1/1 Running 0 65m machine-config-controller-5b89ddfc68-zd8mb 1/1 Running 1 66m machine-config-daemon-68xgq 2/2 Running 0 67m machine-config-daemon-7b2dx 2/2 Running 0 45m machine-config-daemon-j6nz7 2/2 Running 0 45m machine-config-daemon-vlglq 2/2 Running 0 67m machine-config-daemon-vzhb6 2/2 Running 0 67m machine-config-operator-59bbb54b9c-nb7td 1/1 Running 0 64s machine-config-server-llnz2 1/1 Running 0 66m machine-config-server-wpwrv 1/1 Running 0 66m machine-config-server-z76jk 1/1 Running 0 66m [root@helper openshift]# oc -n openshift-machine-config-operator logs -f machine-config-operator-59bbb54b9c-nb7td I0724 13:40:37.239693 1 start.go:46] Version: 4.5.0-0.nightly-2020-07-24-091850 (Raw: v4.5.0-202007240519.p0-dirty, Hash: 99eb744f5094224edb60d88ca85d607ab151ebdf) I0724 13:40:37.244312 1 leaderelection.go:242] attempting to acquire leader lease openshift-machine-config-operator/machine-config... ^C [root@helper openshift]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.5.0-0.nightly-2020-07-24-091850 True False 2m39s Cluster version is 4.5.0-0.nightly-2020-07-24-091850 [root@helper openshift]# oc get co NAME VERSION AVAILABLE PROGRESSING DEGRADED SINCE authentication 4.5.0-0.nightly-2020-07-24-091850 True False False 65m cloud-credential 4.5.0-0.nightly-2020-07-24-091850 True False False 109m cluster-autoscaler 4.5.0-0.nightly-2020-07-24-091850 True False False 84m config-operator 4.5.0-0.nightly-2020-07-24-091850 True False False 43m console 4.5.0-0.nightly-2020-07-24-091850 True False False 15m csi-snapshot-controller 4.5.0-0.nightly-2020-07-24-091850 True False False 20m dns 4.5.0-0.nightly-2020-07-24-091850 True False False 92m etcd 4.5.0-0.nightly-2020-07-24-091850 True False False 91m image-registry 4.5.0-0.nightly-2020-07-24-091850 True False False 85m ingress 4.5.0-0.nightly-2020-07-24-091850 True False False 69m insights 4.5.0-0.nightly-2020-07-24-091850 True False False 85m kube-apiserver 4.5.0-0.nightly-2020-07-24-091850 True False False 91m kube-controller-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 91m kube-scheduler 4.5.0-0.nightly-2020-07-24-091850 True False False 91m kube-storage-version-migrator 4.5.0-0.nightly-2020-07-24-091850 True False False 17m machine-api 4.5.0-0.nightly-2020-07-24-091850 True False False 85m machine-approver 4.5.0-0.nightly-2020-07-24-091850 True False False 38m machine-config 4.5.0-0.nightly-2020-07-24-091850 True False False 5m22s marketplace 4.5.0-0.nightly-2020-07-24-091850 True False False 14m monitoring 4.5.0-0.nightly-2020-07-24-091850 True False False 34m network 4.5.0-0.nightly-2020-07-24-091850 True False False 93m node-tuning 4.5.0-0.nightly-2020-07-24-091850 True False False 36m openshift-apiserver 4.5.0-0.nightly-2020-07-24-091850 True False False 7m23s openshift-controller-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 84m openshift-samples 4.5.0-0.nightly-2020-07-24-091850 True False False 36m operator-lifecycle-manager 4.5.0-0.nightly-2020-07-24-091850 True False False 92m operator-lifecycle-manager-catalog 4.5.0-0.nightly-2020-07-24-091850 True False False 92m operator-lifecycle-manager-packageserver 4.5.0-0.nightly-2020-07-24-091850 True False False 6m58s service-ca 4.5.0-0.nightly-2020-07-24-091850 True False False 93m storage 4.5.0-0.nightly-2020-07-24-091850 True False False 37m