Yum方式安装k8s高可用集群
目录1、集群架构2、系统初始化3、安装高可用组件4、安装master节点5、安装work节点6、安装flannel网络组件7、kubectl基本操作8、节点管理9、添加和取消污点:1、集群架构VIP:192.168.1.100master1:192.168.1.101master2:192.168.1.102master3:192.168.1.103node1:192.168.1.104本文档中的
1、集群架构
VIP:192.168.1.100
master1:192.168.1.101
master2:192.168.1.102
master3:192.168.1.103
node1:192.168.1.104
本文档中的 etcd 集群、master 节点均使用这三台master机器。
2、系统初始化
设置永久主机名称,然后重新登录
[root@master1 ~]# hostnamectl set-hostname master1
[root@master2 ~]# hostnamectl set-hostname master2
[root@master3 ~]# hostnamectl set-hostname master3
[root@node1 ~]# hostnamectl set-hostname node1
修改 /etc/hostname 文件,添加主机名和 IP 的对应关系:
cat > /etc/hosts << EOF
192.168.1.101 master1
192.168.1.102 master2
192.168.1.103 master3
192.168.1.104 node1
EOF
在每台机器上安装依赖包:
yum install -y conntrack ipvsadm ipset jq sysstat curl iptables libseccomp
注:ipvs 依赖 ipset;
在每台机器上关闭防火墙:
① 关闭服务,并设为开机不自启
systemctl stop firewalld && systemctl disable firewalld
② 清空防火墙规则
iptables -F && sudo iptables -X && sudo iptables -F -t nat && sudo iptables -X -t nat
iptables -P FORWARD ACCEPT
关闭 swap 分区,如果开启了 swap 分区,kubelet 会启动失败(可以通过将参数 --fail-swap-on 设置为false 来忽略 swap on),故需要在每台机器上关闭 swap 分区:
swapoff -a && sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
关闭 SELinux,否则后续 K8S 挂载目录时可能报错 Permission denied :
setenforce 0 && sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
加载内核模块
modprobe br_netfilter
modprobe ip_vs
设置系统参数
cat > /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.ipv6.conf.all.disable_ipv6=1
net.netfilter.nf_conntrack_max=2310720
EOF
使配置生效:
sysctl -p /etc/sysctl.d/kubernetes.conf
mount -t cgroup -o cpu,cpuacct none /sys/fs/cgroup/cpu,cpuacct
设置系统时区:
1、调整系统 TimeZone
timedatectl set-timezone Asia/Shanghai
2、将当前的 UTC 时间写入硬件时钟
timedatectl set-local-rtc 0
3、重启依赖于系统时间的服务
systemctl restart rsyslog && systemctl restart crond
更新系统时间:
yum -y install ntpdate
ntpdate cn.pool.ntp.org
3、安装高可用组件
[root@k8s-master1 ~]# yum install -y keepalived haproxy
配置haproxy服务
三个master节点的haproxy配置完全一致:
[root@k8s-master1 ~]# yum install -y keepalived haproxy
[root@k8s-master1 ~]# vim /etc/haproxy/haproxy.cfg
global
log /dev/log local0
log /dev/log local1 notice
chroot /var/lib/haproxy
stats socket /var/run/haproxy-admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
daemon
nbproc 1
defaults
log global
timeout connect 5000
timeout client 10m
timeout server 10m
listen admin_stats
bind 0.0.0.0:10080
mode http
log 127.0.0.1 local0 err
stats refresh 30s
stats uri /status
stats realm welcome login\ Haproxy
stats auth k8s:k8sadmin
stats hide-version
stats admin if TRUE
listen k8s-master1
bind 0.0.0.0:6444
mode tcp
option tcplog
balance source
server 192.168.1.101 192.168.1.101:6443 check inter 2000 fall 2 rise 2 weight 1
server 192.168.1.102 192.168.1.102:6443 check inter 2000 fall 2 rise 2 weight 1
server 192.168.1.103 192.168.1.103:6443 check inter 2000 fall 2 rise 2 weight 1
启动haproxy:
systemctl enable haproxy && systemctl start haproxy
配置keepalived,需要注意的地方:
1、master1节点的state是MASTER,master2和master3节点的是BACKUP
2、master1的权重值priority是100,master2和master3依次递减,比如master2是90,master3是80
master1:
global_defs {
router_id keepalived_hap
}
vrrp_script check-haproxy {
script "killall -0 haproxy"
interval 5
weight -30
}
vrrp_instance VIP {
state MASTER
priority 100
dont_track_primary
interface ens33
virtual_router_id 68
advert_int 3
track_script {
check-haproxy
}
virtual_ipaddress {
192.168.1.100
}
}
master2:
global_defs {
router_id keepalived_hap
}
vrrp_script check-haproxy {
script "killall -0 haproxy"
interval 5
weight -30
}
vrrp_instance VIP {
state BACKUP
priority 90
dont_track_primary
interface ens33
virtual_router_id 68
advert_int 3
track_script {
check-haproxy
}
virtual_ipaddress {
192.168.1.100
}
}
master3:
global_defs {
router_id keepalived_hap
}
vrrp_script check-haproxy {
script "killall -0 haproxy"
interval 5
weight -30
}
vrrp_instance VIP {
state BACKUP
priority 80
dont_track_primary
interface ens33
virtual_router_id 68
advert_int 3
track_script {
check-haproxy
}
virtual_ipaddress {
192.168.1.100
}
}
所有master节点创建HAproxy检查脚本:
cat > /etc/keepalived/check_haproxy.sh << EOF
#!/bin/bash
if [ `ps -C haproxy --no-header | wc -l` == 0 ]; then
systemctl start haproxy
sleep 3
if [ `ps -C haproxy --no-header | wc -l` == 0 ]; then
systemctl stop keepalived
fi
fi
EOF
启动keepalived:
systemctl enable keepalived && systemctl start keepalived
查看master1上是否多了一个VIP地址是192.168.1.100:
[root@master1 ~]# ip addr show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:4d:22:b1 brd ff:ff:ff:ff:ff:ff
inet 192.168.1.101/24 brd 192.168.1.255 scope global noprefixroute ens33
valid_lft forever preferred_lft forever
inet 192.168.1.100/32 scope global ens33
valid_lft forever preferred_lft forever
下面开始安装k8s和docker:
4、安装master节点
配置yum源(所有master节点):
k8s的yum源:
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
docker-ce的yum源:
cd /etc/yum.repos.d/
wget https://download.docker.com/linux/centos/docker-ce.repo
安装kubectl、kubeadm和kubelet,可以指定安装的版本:
yum install -y kubeadm-1.16.6 kubectl-1.16.6 kubelet-1.16.6
配置kubelet开机自启动:
systemctl enable kubelet
安装docker-ce,同样可以指定安装的版本:
Step 1: 查找Docker-CE的版本:
[root@master1 ~]# yum list docker-ce.x86_64 --showduplicates | sort -r
......
docker-ce.x86_64 18.06.3.ce-3.el7 docker-ce-stable
docker-ce.x86_64 18.06.1.ce-3.el7 docker-ce-stable
docker-ce.x86_64 18.06.0.ce-3.el7 docker-ce-stable
......
Step2: 安装指定版本的Docker-CE: (VERSION例如上面的18.06.0.ce-3.el7)
[root@master1 ~]# yum -y install docker-ce-Version
配置加速器并启动docker-ce
[root@master1 ~]# mkdir /etc/docker
[root@master1 ~]# cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors": ["https://z11csm7d.mirror.aliyuncs.com"]
}
EOF
[root@master1 ~]# systemctl enable docker && systemctl start docker
开始初始化集群
修改初始化脚本:
[root@master1 ~]# cat kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
#advertiseAddress是当前节点的IP地址
advertiseAddress: 192.168.1.101
#bindPort要和haproxy中监听的端口一致
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master1
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
#此处新增controlPlaneEndpoint,后面的IP和端口是高可用IP和端口,这个端口不能跟上面的bindPort端口重复,要做高可用集群,此处参数必须添加。
controlPlaneEndpoint: 192.168.1.100:6444
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
#把下面这行的镜像地址默认是谷歌镜像地址,国内连不上,所有修改成阿里云的镜像地址
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
#下面这行k8s的版本号要修改成我们实际安装的kubeadm版本号
kubernetesVersion: v1.16.6
networking:
dnsDomain: cluster.local
#这里添加podSubnet参数,定义好pod所使用的IP段
podSubnet: "10.244.0.0/16"
serviceSubnet: 10.96.0.0/12
scheduler: {}
#下面这部分是新增的,主要是开启IPVS代理模式
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates:
SupportIPVSProxyMode: true
mode: ipvs
开始初始化集群,在任意节点(我们习惯性的在master1节点上)执行初始化命令:
kubeadm init --config=kubeadm-config.yaml --upload-certs | tee kubeadm-init.log
大概三分钟后可以看到执行结果:
[init] Using Kubernetes version: v1.16.0
......
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2 \
--control-plane --certificate-key 3855cc2e945048b10ac9b1b96427c84686fcfd3692ef4a7d30d875da9790ea5d
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2
看到如上内容,表示成功了。
我们来解析一下上面的内容:
1、从初始化的命令可以看到,上面的日志是保存在执行命令时所处的文件目录的,名称是kubeadm-init.log
2、初始化完成后,先执行命令保证kubectl命令可以正常使用:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
3、可以执行如下命令添加管理节点,也就是master节点:
kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2 \
--control-plane --certificate-key 3855cc2e945048b10ac9b1b96427c84686fcfd3692ef4a7d30d875da9790ea5d
4、可以执行如下命令添加工作节点:
kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2
好了,上面集群初始化完成,已经有了一个master节点:
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady master 12m v1.16.6
下面开始在master2和master3上执行上面的添加管理节点的命令,使master2和master3节点成为管理节点:
master2:
[root@master2 ~]# kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2 \
--control-plane --certificate-key 3855cc2e945048b10ac9b1b96427c84686fcfd3692ef4a7d30d875da9790ea5d
......
[check-etcd] Checking that the etcd cluster is healthy
error execution phase check-etcd: etcd cluster is not healthy: failed to dial endpoint https://192.168.1.102:2379 with maintenance client: context deadline exceeded
To see the stack trace of this error execute with --v=5 or higher
好的,上面我们发现在master2节点有报错,检查etcd健康状态的时候是失败的,先放着,回头解决此问题。
再把master3节点加入集群:
[root@master3 ~]# kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2 \
--control-plane --certificate-key 3855cc2e945048b10ac9b1b96427c84686fcfd3692ef4a7d30d875da9790ea5d
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
从上面可以看到,master3已经加入集群了,按照提示执行命令:
[root@master3 ~]# mkdir -p $HOME/.kube
[root@master3 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master3 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
再回到master1上看一下,当前集群有2个节点了:
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady master 14m v1.16.6
master3 NotReady master 5m30s v1.16.6
好了,继续回到上面解决master2的检查etcd健康状态失败的问题,参考网上提供的方法:
1、再master1执行命令,获取etcd节点列表。
[root@master1 ~]# kubectl get pod -n kube-system |grep etcd
etcd-master1 1/1 Running 0 94m
etcd-master3 1/1 Running 0 85m
etcd节点名称为 etcd-hostname
这里以 etcd-master1 为例, 进入etcd容器内部
kubectl exec -ti etcd-master1 -n kube-system sh
# k8s中etcd使用的是v3的api, 所以要先声明变量
export ETCDCTL_API=3
# 执行命令,查看当前的etcd节点数量
etcdctl --cacert="/etc/kubernetes/pki/etcd/ca.crt" --cert="/etc/kubernetes/pki/etcd/server.crt" --key="/etc/kubernetes/pki/etcd/server.key" member list
返回的信息如下
# etcdctl --cacert="/etc/kubernetes/pki/etcd/ca.crt" --cert="/etc/kubernetes/pki/etcd/server.crt" --key="/etc/kubernetes/pki/etcd/server.key" member list
540fa0a1b3b843f3, started, master1, https://192.168.1.101:2380, https://192.168.1.101:2379
808bdf5f71c845e7, started, master2, https://192.168.1.102:2380, https://192.168.1.102:2379
de416ae979088d07, started, master3, https://192.168.1.103:2380, https://192.168.1.103:2379
返回信息的第三列为主机名。 我当前加入失败的节点,主机名为master2 , master2恰恰在列表中。所以得出结论, 加入失败是因为etcd节点信息没有重制, 我们手动把master2删掉即可添加成功
etcdctl --cacert="/etc/kubernetes/pki/etcd/ca.crt" --cert="/etc/kubernetes/pki/etcd/server.crt" --key="/etc/kubernetes/pki/etcd/server.key" member remove 808bdf5f71c845e7
删除后,使用exit退出pod。
再回到master2节点上,执行命令清除刚才加入集群失败时残留的文件:
[root@master2 ~]# kubeadm reset
再重新加入集群:
[root@master2 ~]# kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2 \
--control-plane --certificate-key 3855cc2e945048b10ac9b1b96427c84686fcfd3692ef4a7d30d875da9790ea5d
这次基本就能成功了,还是按照提示执行命令:
[root@master2 ~]# mkdir -p $HOME/.kube
[root@master2 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master2 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
这个时候集群中就有3个节点了:
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 NotReady master 104m v1.16.6
master2 NotReady master 70m v1.16.6
master3 NotReady master 95m v1.16.6
5、安装work节点
同样在node1上初始化系统、安装kubelet、kubectl、kubeadm和docker-ce等,安装完成后,进行如下操作
再添加一个work节点node1,注意添加work节点的命令跟管理节点不一样的:
[root@node1 ~]# kubeadm join 192.168.1.100:6444 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:7edb3c9b488bdc783c8e7635110258d34b2ce0069ff6ee0333e4df3fbf460ba2
......
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
集群节点加入完成,这时候我们注意到所有节点的状态都是NotReady,这是因为我们的k8s网络组件没安装,我们接下来使用yaml文件安装flannel网络组件:
6、安装flannel网络组件
访问https://github.com/coreos/flannel/blob/master/Documentation/kube-flannel.yml 将其中的代码复制出来,创建名为flannel.yml的文件,保存在服务器,通过该文件的内容可以看到,使用的flannel镜像是quay.io/coreos/flannel:v0.12.0-amd64,我们在所有节点上先把镜像下载下来:
[root@master1 ~]# docker pull quay.io/coreos/flannel:v0.12.0-amd64
然后安装flannel网络:
[root@master1 ~]# kubectl apply -f flannel.yml
podsecuritypolicy.policy/psp.flannel.unprivileged created
clusterrole.rbac.authorization.k8s.io/flannel created
clusterrolebinding.rbac.authorization.k8s.io/flannel created
serviceaccount/flannel created
configmap/kube-flannel-cfg created
daemonset.apps/kube-flannel-ds-amd64 created
daemonset.apps/kube-flannel-ds-arm64 created
daemonset.apps/kube-flannel-ds-arm created
daemonset.apps/kube-flannel-ds-ppc64le created
daemonset.apps/kube-flannel-ds-s390x created
再看一下节点状态,全是Ready了:
[root@master1 ~]# kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master1 Ready master 148m v1.16.6 192.168.1.101 <none> CentOS Linux 7 (Core) 5.9.1-1.el7.elrepo.x86_64 docker://18.6.3
master2 Ready master 114m v1.16.6 192.168.1.102 <none> CentOS Linux 7 (Core) 5.9.1-1.el7.elrepo.x86_64 docker://18.6.3
master3 Ready master 139m v1.16.6 192.168.1.103 <none> CentOS Linux 7 (Core) 5.9.1-1.el7.elrepo.x86_64 docker://18.6.3
node1 Ready <none> 19m v1.16.6 192.168.1.104 <none> CentOS Linux 7 (Core) 5.9.1-1.el7.elrepo.x86_64 docker://18.6.3
7、kubectl基本操作
1、查看集群状态
kubectl version --short=true 查看客户端及服务端程序版本信息
[root@master1 ~]# kubectl version --short=true
Client Version: v1.16.6
Server Version: v1.16.0
kubectl cluster-info 查看集群信息
[root@master1 ~]# kubectl cluster-info
Kubernetes master is running at https://192.168.1.100:6444
KubeDNS is running at https://192.168.1.100:6444/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy
To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'.
2、创建资源对象
kubectl run name --image=(镜像名) --replicas=(备份数) --port=(容器要暴露的端口) --labels=(设定自定义标签)
kubectl create -f **.yaml 陈述式对象配置管理方式
kubectl apply -f **.yaml 声明式对象配置管理方式(也适用于更新等)
3、查看资源对象
#kubectl get namespace 查看命名空间
[root@master1 ~]# kubectl get namespace
NAME STATUS AGE
default Active 162m
kube-node-lease Active 162m
kube-public Active 162m
kube-system Active 162m
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 Ready master 3h12m v1.16.6
master2 Ready master 158m v1.16.6
master3 Ready master 3h3m v1.16.6
node1 Ready <none> 64m v1.16.6
#kubectl get pods,services -o wide (-o 输出格式 wide表示plain-text)
[root@master1 ~]# kubectl get pods,services -o wide
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
service/kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 162m <none>
#kubectl get pod -o wide -n kube-system 查看kube-system名称空间下的pod信息
[root@master1 ~]# kubectl get pod -o wide -n kube-system
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-67c766df46-sg58w 1/1 Running 0 163m 10.244.0.2 master1 <none> <none>
coredns-67c766df46-vtxps 1/1 Running 0 163m 10.244.0.3 master1 <none> <none>
etcd-master1 1/1 Running 0 163m 192.168.1.101 master1 <none> <none>
#kubectl get pod -w(-w 监视资源变动信息)
4、打印容器中日志信息
kubectl logs name -f -c container_name -n kube-system (-f 持续监控,-c如果pod中只有一个容器不用加)
5、在容器中执行命令
kubectl exec name -c container_name -n kube-system -- 具体命令
kubectl exec -it pod_name /bin/sh 进入容器的交互式shell
6、删除资源对象
kubectl delete [pods/services/deployments/...] name 删除指定资源对象
kubectl delete [pods/services/deployments/...] -l key=value -n kube-system 删除kube-system下指定标签的资源对象
kubectl delete [pods/services/deployments/...] --all -n kube-system 删除kube-system下所有资源对象
kubectl delete [pods/services/deployments/...] source_name --force --grace-period=0 -n kube-system 强制删除Terminating的资源对象
kubectl delete -f xx.yaml
kubectl apply -f xx.yaml --prune -l <labels>(一般不用这种方式删除)
kubectl delete rs rs_name --cascade=fale(默认删除控制器会同时删除其管控的所有Pod对象,加上cascade=false就只删除rs)
7、更新资源对象
kubectl replace -f xx.yaml --force(--force 如果需要基于此前的配置文件进行替换,需要加上force)
8、将服务暴露出去(创建Service)
kubectl expose deployments/deployment_name --type="NodePort" --port=(要暴露的容器端口) --name=(Service对象名字)
9、扩容和缩容
kubectl scale deployment/deployment_name --replicas=N
kubectl scale deployment/deployment_name --replicas=N --current-replicas=M 只有当前副本数等于M时才会执行扩容或者缩容
10、查看API版本
kubectl api-versions
11、在本地主机上为API Server启动一个代理网关
kubectl proxy --port=8080
之后就可以通过curl来对此套字节发起访问请求
curl localhost:8080/api/v1/namespaces/ | jq .items[].metadata.name (jq可以对json进行过滤)
12、当定义资源配置文件时,不知道怎么定义的时候,可以查看某类型资源的配置字段解释
kubectl explain pods/deployments/...(二级对象可用类似于pods.spec这种方式查看)
13、查看某资源对象的配置文件
kubectl get source_type source_name -o yaml --export(--export表示省略由系统生成的信息) 后面加 > file.yaml就可以快速生成一个配置文件了
14、标签管理相关命令
kubectl label pods/pod_name key=value 添加标签,如果是修改的话需要后面添加--overwrite
kubectl label nodes node_name key=value 给工作节点添加标签,后续可以使用nodeSelector来指定pod被调度到指定的工作节点上运行
15、注解管理相关命令
kubectl annotate pods pod_name key=value
16、patch修改Deployment控制器进行控制器升级
kubectl patch deployment deployment-demo -p '{"spec": {"minReadySeconds": 5}}'(-p 以补丁形式更新补丁形式默认是json)
kubectl set image deployments deployment-demo myapp=ikubernetes/myapp:v2 修改depolyment中的镜像文件
kubectl rollout status deployment deployment-demo 打印滚动更新过程中的状态信息
kubectl get deployments deployment-demo --watch 监控deployment的更新过程
kubectl kubectl rollout pause deployments deployment-demo 暂停更新
kubectl rollout resume deployments deployment-demo 继续更新
kubectl rollout history deployments deployment-demo 查看历史版本(能查到具体的历史需要在apply的时候加上--record参数)
kubectl rollout undo deployments deployment-demo --to-revision=2 回滚到指定版本,不加--to-v
8、节点管理
我们通过上面的命令可以看到有一列叫ROLES,就是各个节点的角色,比如master,worker,node,等,下面我们通过命令来改变各个节点的roler。
删除node节点
kubectl delete node node1
将node1改为master:
[root@master1 ~]# kubectl label node node1 node-role.kubernetes.io/master=worker
node/node1 labeled
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 Ready master 173m v1.16.6
master2 Ready master 138m v1.16.6
master3 Ready master 164m v1.16.6
node1 Ready master 44m v1.16.6
查看一下node1的描述,可以看到node1当前是master,但Taints:是none,表示没有污点,Unschedulable: false表示可以调度,pod依然可以在此master节点运行:
[root@master1 ~]# kubectl describe node node1
Name: node1
Roles: master
Labels: beta.kubernetes.io/arch=amd64
beta.kubernetes.io/os=linux
kubernetes.io/arch=amd64
kubernetes.io/hostname=node1
kubernetes.io/os=linux
node-role.kubernetes.io/master=
Annotations: flannel.alpha.coreos.com/backend-data: {"VtepMAC":"e2:fe:85:a4:7c:04"}
flannel.alpha.coreos.com/backend-type: vxlan
flannel.alpha.coreos.com/kube-subnet-manager: true
flannel.alpha.coreos.com/public-ip: 192.168.1.104
kubeadm.alpha.kubernetes.io/cri-socket: /var/run/dockershim.sock
node.alpha.kubernetes.io/ttl: 0
volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp: Sun, 01 Nov 2020 16:28:45 +0800
Taints: <none>
Unschedulable: false
取消node1的master:
[root@master1 ~]# kubectl label node node1 node-role.kubernetes.io/master-
node/node1 labeled
[root@master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master1 Ready master 177m v1.16.6
master2 Ready master 143m v1.16.6
master3 Ready master 168m v1.16.6
node1 Ready <none> 48m v1.16.6
[root@master1 ~]# kubectl describe node node1
Name: node1
Roles: <none>
Labels: beta.kubernetes.io/arch=amd64
beta.kubernetes.io/os=linux
kubernetes.io/arch=amd64
kubernetes.io/hostname=node1
kubernetes.io/os=linux
Annotations: flannel.alpha.coreos.com/backend-data: {"VtepMAC":"e2:fe:85:a4:7c:04"}
flannel.alpha.coreos.com/backend-type: vxlan
flannel.alpha.coreos.com/kube-subnet-manager: true
flannel.alpha.coreos.com/public-ip: 192.168.1.104
kubeadm.alpha.kubernetes.io/cri-socket: /var/run/dockershim.sock
node.alpha.kubernetes.io/ttl: 0
volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp: Sun, 01 Nov 2020 16:28:45 +0800
Taints: <none>
Unschedulable: false
9、添加和取消污点:
语法:
kubectl taint node [node] key=value[effect],其中[effect] 可取值: [ NoSchedule | PreferNoSchedule | NoExecute ]
NoSchedule: 一定不能被调度
PreferNoSchedule: 尽量不要调度
NoExecute: 不仅不会调度, 还会驱逐Node上已有的Pod
我们初始化集群的时候,分别加入了master1、master2和master3三个master节点,这三个节点都被添加了污点,在默认情况下并不参与工作负载,可以看到Taints这里不是none,而是node-role.kubernetes.io/master:NoSchedule,表示有污点并且是NoSchedule,不容许业务pod在此节点运行,master2和master3也一样:
[root@master1 ~]# kubectl describe node master1
Name: master1
Roles: master
Labels: beta.kubernetes.io/arch=amd64
beta.kubernetes.io/os=linux
kubernetes.io/arch=amd64
kubernetes.io/hostname=master1
kubernetes.io/os=linux
node-role.kubernetes.io/master=
Annotations: flannel.alpha.coreos.com/backend-data: {"VtepMAC":"0a:9e:ee:99:37:8c"}
flannel.alpha.coreos.com/backend-type: vxlan
flannel.alpha.coreos.com/kube-subnet-manager: true
flannel.alpha.coreos.com/public-ip: 192.168.1.101
kubeadm.alpha.kubernetes.io/cri-socket: /var/run/dockershim.sock
node.alpha.kubernetes.io/ttl: 0
volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp: Sun, 01 Nov 2020 14:19:57 +0800
Taints: node-role.kubernetes.io/master:NoSchedule
Unschedulable: false
如果希望此节点承担work,则可以执行下面的命令(删除Node的Label“node-role.kubernetes.io/master”),让Master成为一个Node:
kubectl taint node master1 node-role.kubernetes.io/master:NoSchedule-
重新添加污点:
kubectl taint node master1 node-role.kubernetes.io/master:NoSchedule
更多推荐
所有评论(0)