由于node节点存在异常,需要
- 将异常node节点打污点标记,防止再被调度
- 异常节点node上的pod驱逐(若有)
- 异常node节点维护(更新程序、重启等)
- 取消异常node节点的五点标签
# 服务中断时间=停机等待5分钟时间+重建时间+服务启动时间+readiness探针检测正常时间
# 集群健康检查
kubectl get cs
kubectl get node -owide
kubectl get pod -A | grep -v Running
# 命令查询
kubectl taint nodes node1 key1=value1:NoSchedule #设置node1节点不可调度
kubectl taint nodes node1 key1=value1:NoSchedule- #设置node1节点可调度
# NoSchedule: 一定不能被调度
# PreferNoSchedule: 尽量不要调度
# NoExecute: 不仅不会调度, 还会驱逐Node上已有的Pod
# 将 node 节点标记为不可调度,不影响现有 pod,daemonSet 不受影响
kubectl cordon node-name
# 驱逐该节点的 pod
kubectl drain node-name
# 驱逐pod,同时为node加上污点标签
kubectl drain node --ignore-daemonsets
--delete-local-data # 删除本地数据,清空emptyDir
--ignore-daemonsets # 忽略DeamonSet
--force # 不加force参数只会删除该node节点上的ReplicationController, ReplicaSet, DaemonSet,StatefulSet or Job,加上后所有pod都将删除
# 维护结束,节点重新投入使用
kubectl uncordon node-name
# 删除节点
kubectl delete node node-nam
#节点维护命令
kubectl taint nodes hq-t-k8s-worker03 key1=value1:NoSchedule
kubectl cordon hq-t-k8s-worker03
kubectl drain hq-t-k8s-worker03 --ignore-daemonsets
#节点恢复命令
kubectl taint nodes hq-t-k8s-worker03 key1=value1:NoSchedule-
kubectl uncordon hq-t-k8s-worker03
#节点维护命令
kubectl taint nodes hq-t-k8s-worker02 key1=value1:NoSchedule
kubectl cordon hq-t-k8s-worker01
kubectl drain hq-t-k8s-worker02 --ignore-daemonsets
kubectl drain hq-t-k8s-worker01 --ignore-daemonsets --delete-emptydir-data
#节点恢复命令
kubectl taint nodes hq-t-k8s-worker01 key1=value1:NoSchedule-
kubectl uncordon hq-t-k8s-worker02