Bug 1479362
| Summary: | "unable to create new content in namespace demo00 because it is being terminated" during drain | ||||||
|---|---|---|---|---|---|---|---|
| Product: | OpenShift Container Platform | Reporter: | Justin Pierce <jupierce> | ||||
| Component: | Node | Assignee: | Derek Carr <decarr> | ||||
| Status: | CLOSED DUPLICATE | QA Contact: | DeShuai Ma <dma> | ||||
| Severity: | urgent | Docs Contact: | |||||
| Priority: | urgent | ||||||
| Version: | 3.6.0 | CC: | aos-bugs, jliggitt, jokerman, jupierce, mmccomas, nraghava, sdodson, sjenning | ||||
| Target Milestone: | --- | ||||||
| Target Release: | --- | ||||||
| Hardware: | Unspecified | ||||||
| OS: | Unspecified | ||||||
| Whiteboard: | |||||||
| Fixed In Version: | Doc Type: | If docs needed, set a value | |||||
| Doc Text: | Story Points: | --- | |||||
| Clone Of: | Environment: | ||||||
| Last Closed: | 2017-08-08 17:50:31 UTC | Type: | Bug | ||||
| Regression: | --- | Mount Type: | --- | ||||
| Documentation: | --- | CRM: | |||||
| Verified Versions: | Category: | --- | |||||
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |||||
| Cloudforms Team: | --- | Target Upstream Version: | |||||
| Embargoed: | |||||||
| Attachments: |
|
||||||
|
Description
Justin Pierce
2017-08-08 12:37:45 UTC
I thought this was a spurious condition that could be recovered by retrying the operation, but it seems worse than that. 60 attempts were exhausted by openshift-ansible trying to recover from this error. Problem reoccurred this time on dakinitest20170618 namespace
},
"rc": 1,
"retries": 61,
"start": "2017-08-08 16:49:58.313226",
"stderr": "WARNING: replicationcontrollers \"mysql-1\" not found: mysql-1-p3ttk, mysql-1-p3ttk\nWARNING: replicationcontrollers \"mysql-1\" not found: mysql-1-p3ttk, mysql-1-p3ttk\nThere are pending pods when an error occurred: error when evicting pod \"mysql-1-p3ttk\": pods \"mysql-1-p3ttk\" is forbidden: unable to create new content in namespace dakinitest20170618 because it is being terminated.\npod/mysql-1-p3ttk\npod/jws-app-1-hkvqc\nerror: error when evicting pod \"mysql-1-p3ttk\": pods \"mysql-1-p3ttk\" is forbidden: unable to create new content in namespace dakinitest20170618 because it is being terminated.",
"stdout": "node \"ip-172-31-53-187.ec2.internal\" already cordoned",
"stdout_lines": [
"node \"ip-172-31-53-187.ec2.internal\" already cordoned"
],
[root@starter-us-east-1-master-25064 ~]# oc get all -n dakinitest20170618
NAME READY STATUS RESTARTS AGE
po/mysql-1-p3ttk 0/1 Unknown 1 19d
[root@starter-us-east-1-master-25064 ~]# oc get all -n dakinitest20170618 -o yaml
apiVersion: v1
items:
- apiVersion: v1
kind: Pod
metadata:
annotations:
kubernetes.io/created-by: |
{"kind":"SerializedReference","apiVersion":"v1","reference":{"kind":"ReplicationController","namespace":"dakinitest20170618","name":"mysql-1","uid":"e0b97aaf-543b-11e7-835c-12d641ec7610","apiVersion":"v1","resourceVersion":"1067971746"}}
openshift.io/deployment-config.latest-version: "1"
openshift.io/deployment-config.name: mysql
openshift.io/deployment.name: mysql-1
openshift.io/generated-by: OpenShiftNewApp
openshift.io/scc: restricted
creationTimestamp: 2017-07-20T06:37:53Z
deletionGracePeriodSeconds: 30
deletionTimestamp: 2017-08-02T16:48:38Z
generateName: mysql-1-
labels:
app: cakephp-mysql-persistent
deployment: mysql-1
deploymentconfig: mysql
name: mysql
name: mysql-1-p3ttk
namespace: dakinitest20170618
ownerReferences:
- apiVersion: v1
blockOwnerDeletion: true
controller: true
kind: ReplicationController
name: mysql-1
uid: e0b97aaf-543b-11e7-835c-12d641ec7610
resourceVersion: "1258228086"
selfLink: /api/v1/namespaces/dakinitest20170618/pods/mysql-1-p3ttk
uid: f533bbeb-6d15-11e7-803a-122631632f42
spec:
containers:
- env:
- name: MYSQL_USER
valueFrom:
secretKeyRef:
key: database-user
name: cakephp-mysql-persistent
- name: MYSQL_PASSWORD
valueFrom:
secretKeyRef:
key: database-password
name: cakephp-mysql-persistent
- name: MYSQL_DATABASE
value: default
image: registry.access.redhat.com/rhscl/mysql-57-rhel7@sha256:991ef507a4e981531a5601f12ceb65da32605792f1117f15a6001305dd3cfd73
imagePullPolicy: Always
livenessProbe:
failureThreshold: 3
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
tcpSocket:
port: 3306
timeoutSeconds: 1
name: mysql
ports:
- containerPort: 3306
protocol: TCP
readinessProbe:
exec:
command:
- /bin/sh
- -i
- -c
- MYSQL_PWD='s48UeOLoL1JQtT3T' mysql -h 127.0.0.1 -u cakephp -D default
-e 'SELECT 1'
failureThreshold: 3
initialDelaySeconds: 5
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: "1"
memory: 512Mi
requests:
cpu: 60m
memory: 307Mi
securityContext:
capabilities:
drop:
- KILL
- MKNOD
- NET_RAW
- SETGID
- SETUID
- SYS_CHROOT
privileged: false
runAsUser: 1124180000
seLinuxOptions:
level: s0:c352,c314
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/lib/mysql/data
name: mysql-data
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: default-token-tx31j
readOnly: true
dnsPolicy: ClusterFirst
imagePullSecrets:
- name: default-dockercfg-ckvl7
nodeName: ip-172-31-53-187.ec2.internal
nodeSelector:
type: compute
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 1124180000
seLinuxOptions:
level: s0:c352,c314
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
volumes:
- name: mysql-data
persistentVolumeClaim:
claimName: mysql
- name: default-token-tx31j
secret:
defaultMode: 420
secretName: default-token-tx31j
status:
conditions:
- lastProbeTime: null
lastTransitionTime: 2017-07-20T06:37:53Z
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: 2017-07-27T16:50:18Z
message: 'containers with unready status: [mysql]'
reason: ContainersNotReady
status: "False"
type: Ready
- lastProbeTime: null
lastTransitionTime: 2017-07-20T06:37:53Z
status: "True"
type: PodScheduled
containerStatuses:
- containerID: docker://e98609ceb53f32685938b1d5ce13586e9dcfea5220321916eb5b523fa577e967
image: registry.access.redhat.com/rhscl/mysql-57-rhel7@sha256:991ef507a4e981531a5601f12ceb65da32605792f1117f15a6001305dd3cfd73
imageID: docker-pullable://registry.access.redhat.com/rhscl/mysql-57-rhel7@sha256:991ef507a4e981531a5601f12ceb65da32605792f1117f15a6001305dd3cfd73
lastState:
terminated:
containerID: docker://3439f952bde86637ef420a02dae289707bd0a827a4ba7376b6b60855aeb37063
exitCode: 0
finishedAt: 2017-07-21T23:29:02Z
reason: Completed
startedAt: 2017-07-20T06:40:30Z
name: mysql
ready: false
restartCount: 1
state:
running:
startedAt: 2017-07-21T23:35:32Z
hostIP: 172.31.53.187
phase: Running
qosClass: Burstable
startTime: 2017-07-20T06:37:53Z
kind: List
metadata: {}
resourceVersion: ""
selfLink: ""
[root@starter-us-east-1-master-25064 ~]#
Created attachment 1310760 [details]
metrics output
This looks like https://bugzilla.redhat.com/show_bug.cgi?id=1460729 Based on namespace termination hanging due to stuck pods due to docker state mismatch with containerd that resolves after a few hours. Indicators include "containerd: container not found" in the node log. *** This bug has been marked as a duplicate of bug 1460729 *** |