+++ This bug was initially created as a clone of Bug #1894677 +++ Description of problem: After update to 4.6.1 (from 4.5.16) the image pruner job has reached the specified backoff limit. After looking in to the pod log we saw the panic k8s.io/klog/v2.(*loggingT).output(0x4d2aec0, 0xc000000003, 0x0, 0x0, 0xc000a64a10, 0x48a9af4, 0xa, 0x73, 0x41d400) /go/src/github.com/openshift/oc/vendor/k8s.io/klog/v2/klog.go:945 +0x191 k8s.io/klog/v2.(*loggingT).printDepth(0x4d2aec0, 0x3, 0x0, 0x0, 0x2, 0xc00259d738, 0x1, 0x1) /go/src/github.com/openshift/oc/vendor/k8s.io/klog/v2/klog.go:718 +0x165 k8s.io/klog/v2.FatalDepth(...) /go/src/github.com/openshift/oc/vendor/k8s.io/klog/v2/klog.go:1449 k8s.io/kubectl/pkg/cmd/util.fatal(0xc0007840c0, 0xbb, 0x1) /go/src/github.com/openshift/oc/vendor/k8s.io/kubectl/pkg/cmd/util/helpers.go:93 +0x1f0 k8s.io/kubectl/pkg/cmd/util.checkErr(0x347a520, 0xc001130f20, 0x31c65e8) /go/src/github.com/openshift/oc/vendor/k8s.io/kubectl/pkg/cmd/util/helpers.go:188 +0x945 k8s.io/kubectl/pkg/cmd/util.CheckErr(...) /go/src/github.com/openshift/oc/vendor/k8s.io/kubectl/pkg/cmd/util/helpers.go:115 github.com/openshift/oc/pkg/cli/admin/prune/images.NewCmdPruneImages.func1(0xc00164b340, 0xc0013de9a0, 0x0, 0x7) /go/src/github.com/openshift/oc/pkg/cli/admin/prune/images/images.go:161 +0x1c5 github.com/spf13/cobra.(*Command).execute(0xc00164b340, 0xc0013de930, 0x7, 0x7, 0xc00164b340, 0xc0013de930) /go/src/github.com/openshift/oc/vendor/github.com/spf13/cobra/command.go:846 +0x2c2 github.com/spf13/cobra.(*Command).ExecuteC(0xc000227080, 0x2, 0xc000227080, 0x2) /go/src/github.com/openshift/oc/vendor/github.com/spf13/cobra/command.go:950 +0x375 github.com/spf13/cobra.(*Command).Execute(...) /go/src/github.com/openshift/oc/vendor/github.com/spf13/cobra/command.go:887 main.main() /go/src/github.com/openshift/oc/cmd/oc/oc.go:110 +0x885 goroutine 19 [chan receive]: k8s.io/klog/v2.(*loggingT).flushDaemon(0x4d2aec0) /go/src/github.com/openshift/oc/vendor/k8s.io/klog/v2/klog.go:1131 +0x8b created by k8s.io/klog/v2.init.0 /go/src/github.com/openshift/oc/vendor/k8s.io/klog/v2/klog.go:416 +0xd8 goroutine 5 [chan receive]: k8s.io/klog.(*loggingT).flushDaemon(0x4d2ade0) /go/src/github.com/openshift/oc/vendor/k8s.io/klog/klog.go:1010 +0x8b created by k8s.io/klog.init.0 /go/src/github.com/openshift/oc/vendor/k8s.io/klog/klog.go:411 +0xd8 goroutine 10 [select]: k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0x31c6510, 0x347e280, 0xc00133d7a0, 0x1, 0xc000102360) /go/src/github.com/openshift/oc/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:167 +0x149 k8s.io/apimachinery/pkg/util/wait.JitterUntil(0x31c6510, 0x12a05f200, 0x0, 0xc000347001, 0xc000102360) /go/src/github.com/openshift/oc/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:133 +0x98 k8s.io/apimachinery/pkg/util/wait.Until(...) /go/src/github.com/openshift/oc/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:90 k8s.io/apimachinery/pkg/util/wait.Forever(0x31c6510, 0x12a05f200) /go/src/github.com/openshift/oc/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:81 +0x4f created by k8s.io/component-base/logs.InitLogs /go/src/github.com/openshift/oc/vendor/k8s.io/component-base/logs/logs.go:58 +0x8a goroutine 28 [select]: io.(*pipe).Read(0xc001562d80, 0xc00164c000, 0x1000, 0x1000, 0x2aa4860, 0x2, 0x2) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/io/pipe.go:57 +0xe7 io.(*PipeReader).Read(0xc0002fda90, 0xc00164c000, 0x1000, 0x1000, 0x0, 0x0, 0x0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/io/pipe.go:134 +0x4c bufio.(*Scanner).Scan(0xc00156da00, 0x0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/bufio/scan.go:214 +0xa9 github.com/openshift/oc/pkg/cli/admin/mustgather.newPrefixWriter.func1(0xc00156da00, 0x347fca0, 0xc00012c008, 0x2fdfe6d, 0x17) /go/src/github.com/openshift/oc/pkg/cli/admin/mustgather/mustgather.go:424 +0x13e created by github.com/openshift/oc/pkg/cli/admin/mustgather.newPrefixWriter /go/src/github.com/openshift/oc/pkg/cli/admin/mustgather/mustgather.go:423 +0x1d0 goroutine 32 [IO wait]: internal/poll.runtime_pollWait(0x7f606a11a7d8, 0x72, 0x3482f20) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/runtime/netpoll.go:220 +0x55 internal/poll.(*pollDesc).wait(0xc0016d6498, 0x72, 0xc001f1a000, 0x11d2a, 0x11d2a) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/internal/poll/fd_poll_runtime.go:87 +0x45 internal/poll.(*pollDesc).waitRead(...) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/internal/poll/fd_poll_runtime.go:92 internal/poll.(*FD).Read(0xc0016d6480, 0xc001f1a000, 0x11d2a, 0x11d2a, 0x0, 0x0, 0x0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/internal/poll/fd_unix.go:159 +0x1b1 net.(*netFD).Read(0xc0016d6480, 0xc001f1a000, 0x11d2a, 0x11d2a, 0x203000, 0x78699b, 0xc00068abe0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/net/fd_posix.go:55 +0x4f net.(*conn).Read(0xc0002fc000, 0xc001f1a000, 0x11d2a, 0x11d2a, 0x0, 0x0, 0x0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/net/net.go:182 +0x8e crypto/tls.(*atLeastReader).Read(0xc0024ac220, 0xc001f1a000, 0x11d2a, 0x11d2a, 0x379, 0x11d25, 0xc000e83710) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/crypto/tls/conn.go:779 +0x62 bytes.(*Buffer).ReadFrom(0xc00068ad00, 0x347a260, 0xc0024ac220, 0x41d785, 0x2bdcba0, 0x2ed2dc0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/bytes/buffer.go:204 +0xb1 crypto/tls.(*Conn).readFromUntil(0xc00068aa80, 0x347f7c0, 0xc0002fc000, 0x5, 0xc0002fc000, 0x368) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/crypto/tls/conn.go:801 +0xf3 crypto/tls.(*Conn).readRecordOrCCS(0xc00068aa80, 0x0, 0x0, 0xc000e83d18) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/crypto/tls/conn.go:608 +0x115 crypto/tls.(*Conn).readRecord(...) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/crypto/tls/conn.go:576 crypto/tls.(*Conn).Read(0xc00068aa80, 0xc000c6e000, 0x1000, 0x1000, 0x0, 0x0, 0x0) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/crypto/tls/conn.go:1252 +0x15f bufio.(*Reader).Read(0xc0003c18c0, 0xc0005e57d8, 0x9, 0x9, 0xc000e83d18, 0x31c9f00, 0x9bbceb) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/bufio/bufio.go:227 +0x222 io.ReadAtLeast(0x347a000, 0xc0003c18c0, 0xc0005e57d8, 0x9, 0x9, 0x9, 0xc000112050, 0x0, 0x347a520) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/io/io.go:314 +0x87 io.ReadFull(...) /opt/rh/go-toolset-1.15/root/usr/lib/go-toolset-1.15-golang/src/io/io.go:333 golang.org/x/net/http2.readFrameHeader(0xc0005e57d8, 0x9, 0x9, 0x347a000, 0xc0003c18c0, 0x0, 0x0, 0xc002514300, 0x0) /go/src/github.com/openshift/oc/vendor/golang.org/x/net/http2/frame.go:237 +0x89 golang.org/x/net/http2.(*Framer).ReadFrame(0xc0005e57a0, 0xc002514300, 0x0, 0x0, 0x0) /go/src/github.com/openshift/oc/vendor/golang.org/x/net/http2/frame.go:492 +0xa5 golang.org/x/net/http2.(*clientConnReadLoop).run(0xc000e83fa8, 0x0, 0x0) /go/src/github.com/openshift/oc/vendor/golang.org/x/net/http2/transport.go:1794 +0xd8 golang.org/x/net/http2.(*ClientConn).readLoop(0xc0013cd080) /go/src/github.com/openshift/oc/vendor/golang.org/x/net/http2/transport.go:1716 +0x6f created by golang.org/x/net/http2.(*Transport).newClientConn /go/src/github.com/openshift/oc/vendor/golang.org/x/net/http2/transport.go:695 +0x66e Version-Release number of selected component (if applicable): 4.6.1 How reproducible: Steps to Reproduce: We came across this cluster in datahub/telemetry Actual results: The imag epruner job panics Expected results: The job should not panic Additional info: From cluster version history { "state": "Completed", "startedTime": "2020-10-29T19:04:01Z", "completionTime": "2020-11-02T14:45:48Z", "version": "4.6.1", "image": "quay.io/openshift-release-dev/ocp-release@sha256:d78292e9730dd387ff6198197c8b0598da340be7678e8e1e4810b557a926c2b9", "verified": false }, { "state": "Completed", "startedTime": "2020-10-28T19:48:07Z", "completionTime": "2020-10-28T20:24:07Z", "version": "4.5.16", "image": "quay.io/openshift-release-dev/ocp-release@sha256:adb5ef06c54ff75ca9033f222ac5e57f2fd82e49bdd84f737d460ae542c8af60", "verified": false }, From clusterversion status conditions { "type": "Failing", "status": "True", "lastTransitionTime": "2020-11-03T22:02:18Z", "reason": "ClusterOperatorDegraded", "message": "Cluster operator image-registry is reporting a failure: ImagePrunerDegraded: Job has reached the specified backoff limit" } { "type": "Progressing", "status": "False", "lastTransitionTime": "2020-11-02T14:45:48Z", "reason": "ClusterOperatorDegraded", "message": "Error while reconciling 4.6.1: the cluster operator image-registry is degraded" } config/clusteroperator/image-registry.json operator 4.6.1 2020-11-04T00:00:39Z Degraded True ImagePrunerDegraded: Job has reached the specified backoff limit --- Additional comment from obulatov on 2020-11-04 19:46:16 UTC --- Can you find in logs the line that starts with `panic: `? --- Additional comment from wking on 2020-11-05 17:57:41 UTC --- > Can you find in logs the line that starts with `panic: `? This is not a cluster we have direct access to. The panic stack is from an tarball uploaded from a customer cluster via Insights [1]. So the portion of the stack you see is all we have. I guess we could look in CI to see if we can find similar panics? Or get what we can from the stack-fragment to try to figure out a reproducer? Or get CCX to search their logs to see if we can find this still occurring somewhere where we can get the full logs (maybe in an OSD cluster?) or in a customer cluster where we can ask the customer for full logs. Or we can make the insights-operator smart enough to see panics and focus on the crashing goroutine. Lots of options, but nothing super easy. [1]: https://github.com/openshift/insights-operator/
Verified on 4.6.0-0.nightly-2020-11-22-160856 which is upgrade from 4.5.20.
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (OpenShift Container Platform 4.6.6 bug fix update), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHBA-2020:5115