kubeadm部署3master3node centos7的crio(1.24.0)+k8s(1.26.0)
kubeadm部署3master3node centos7的crio(1.24.0)+k8s(1.26.0)
kubeadm部署3master3node crio(1.24.0)+k8s(1.26.0)
文章目录
环境说明:
部署平台:openstack
网络:可翻墙,网速不好
系统: centos7
主机:
192.168.20.127 k8s-master-1 8C16G
192.168.20.32 k8s-master-2 8C16G
192.168.20.121 k8s-master-3 8C16G
192.168.20.18 k8s-node-1 16C32G
192.168.20.93 k8s-node-2 16C32G
192.168.20.78 k8s-node-3 16C32G
部署:
1.系统基础配置
#以下操作所有机器执行
0.ip地址最好为静态
1.配置/etc/hosts
2.配置ssh免密登陆
3.禁用se
sed -i 's/enforcing/disabled/' /etc/selinux/config
setenforce 0
getenforce
4.部署k8s所需要的内核优化
#初始化:
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
# 应用 sysctl 参数而不重新启动
sudo sysctl --system
2.部署keepalived+haproxy配置vip
#以下操作所有机器执行
yum install -y keepalived haproxy
cp /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.bak
cp /etc/keepalived/keepalived.conf /etc/keepalived/keepalived.conf.bak
#haproxy配置
cat > /etc/haproxy/haproxy.cfg << EOF
global
log 127.0.0.1 local2
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
frontend k8s-master
bind 0.0.0.0:16443
bind 127.0.0.1:16443
mode tcp
option tcplog
tcp-request inspect-delay 5s
default_backend k8s-master
backend k8s-master
mode tcp
option tcplog
option tcp-check
balance roundrobin
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server k8s-master-1 192.168.20.127:6443 check
server k8s-master-2 192.168.20.32:6443 check
server k8s-master-3 192.168.20.121:6443 check
EOF
#keepalived配置
cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
router_id k8s
}
vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 250
advert_int 1
authentication {
auth_type PASS
auth_pass k8s
}
virtual_ipaddress {
192.168.20.150/24
}
track_script {
check_haproxy
}
}
EOF
systemctl enable haproxy;systemctl start haproxy
systemctl start keepalived;systemctl start keepalived
ip a | grep eth0
#验证vip工作状态,是否正常漂移
3.安装crio+(kubeadm,kubectl,kubelet)
crio指定版本安装
参考https://cri-o.io/
OS指定系统
VERSION指定版本
OS=CentOS_7
VERSION=1.24
export OS VERSION
curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable.repo https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/$OS/devel:kubic:libcontainers:stable.repo
curl -L -o /etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:$VERSION.repo https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable:cri-o:$VERSION/$OS/devel:kubic:libcontainers:stable:cri-o:$VERSION.repo
yum install cri-o -y
systemctl enable crio
systemctl start crio
三件套指定版本安装
#参考阿里云开源镜像站
#我安装的时候最新版为1.16,如果需要指定版本,记得yum install 指定版本包
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
setenforce 0
#yum install -y kubelet kubeadm kubectl
yum install -y kubelet-1.26.0-0.x86_64 kubeadm-1.26.0-0.x86_64 kubectl-1.26.0-0.x86_64
systemctl enable kubelet && systemctl start kubelet
4.配置crio+kubelet
#!所有节点执行
#网段修改操作,记得和kubeadm init文件保持一致
sed -i 's/10.85.0.0/10.244.0.0/g' /etc/cni/net.d/100-crio-bridge.conf
#kubelet 操作:
vim /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf ,添加
--container-runtime=remote --cgroup-driver=systemd --container-runtime-endpoint=unix:///var/run/crio/crio.sock --runtime-request-timeout=5m
systemctl daemon-reload;systemctl restart kubelet
[root@k8s-master-2 ~]# cat /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf | grep -v "^#"
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
EnvironmentFile=-/etc/sysconfig/kubelet
ExecStart=
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS --container-runtime=remote --cgroup-driver=systemd --container-runtime-endpoint=unix:///var/run/crio/crio.sock --runtime-request-timeout=5m
#crio配置
[root@k8s-master-2 ~]# cat /etc/crictl.yaml
runtime-endpoint: unix:///var/run/crio/crio.sock
image-endpoint: unix:///var/run/crio/crio.sock
timeout: 10
debug: false
pull-image-on-create: true
disable-pull-on-run: false
#直接删除原文件,写入新文件会报错,建议原文件备份修改,然后重启crio
[root@k8s-master-2 ~]# grep -Env '#|^$|^\[' /etc/crio/crio.conf
136:selinux = true
169:cgroup_manager = "systemd"
455:pause_image = "pww.artifactory.cdi.philips.com/tools/k8s1.26/pause:3.9"
478:insecure_registries = [
479: "docker.io",
480: "pww.artifactory.cdi.philips.com",
481: "registry.access.redhat.com",
482: "quay.io",
483: "registry.aliyuncs.com"
484: ]
505:plugin_dirs = [
506: "/opt/cni/bin",
507: "/usr/libexec/cni",
508:]
514:enable_metrics = true
548:metrics_port = 9537
5.生成init配置文件,并为init做准备
#master01执行
kubeadm config print init-defaults > kubeadm-config.yaml
[root@k8s-master-1 ~]# cat kubeadm/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.20.127 #本机ip
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/crio/crio.sock #criosocket path
imagePullPolicy: IfNotPresent
#name: node
taints: null
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: "192.168.20.150:16443" #vip和端口
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: pww.artifactory.cdi.com/tools/k8s1.26 #我这里是提前把镜像下载下来导入到内网仓库了,然后可以使用crictl pull 命令提前下载到机器上
kind: ClusterConfiguration
kubernetesVersion: 1.26.0
networking:
dnsDomain: cluster.local
podSubnet: 10.244.0.0/16 #pod网段,记得和/etc/cni/net.d/100-crio-bridge.conf对应
serviceSubnet: 10.1.0.0/16 #service网段,
scheduler: {}
#---
#apiVersion: kubeproxy.config.k8s.io/v1alpha1
#kind: KubeProxyConfiguration
#featureGates:
# SupportIPVSProxyMode: true
#mode: ipvs
执行以下命令可查看镜像信息
kubeadm config images list --config kubeadm.yml
下载主节点镜像
kubeadm config images pull --config kubeadm.yml
#这个命令不好用,个人建议是list镜像信息后,
#通过其他容器命令buildah,docker pull 镜像,然后上传到私库
#然后机器上使用crictl pull 私库镜像下载下来再init
6.kubeadm init
kubeadm reset -f #失败的话,就排查原因用这个命令重试
kubeadm init --config=kubeadm-config.yaml | tee kubeadm-init.log
#成功会显示出以下信息
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.168.20.150:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:6e6990afc6b65416d7b1a2b00c154798dc38c1c611aa02b93754f35870dafae4 \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.168.20.150:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:6e6990afc6b65416d7b1a2b00c154798dc38c1c611aa02b93754f35870dafae4
7.join control-plane nodes and join worker nodes
master节点join还需要从master01拷贝证书文件
worker节点直接执行命令就可以
scp -r /etc/kubernetes/pki centos@k8s-master-2:/home/centos/
ssh centos@k8s-master-2 "sudo cp -ra /home/centos/pki /etc/kubernetes/"
master2执行:
cd /etc/kubernetes/pki;ls | grep api | xargs rm -f
cd /etc/kubernetes/pki/etcd;ls | grep -v ca. | xargs rm -f
#然后执行上面init成功显示的命令
8.部署网络插件
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
#如果pod发现起不来,查看日志发现有问题,宿主机flannel网卡和cni网卡网段不一致可以删掉网卡重建
sudo ifconfig cni0 down
sudo ip link delete cni0
9.验证部署状态
kubectl get cs
kubectl get pod -A
#部署个pod和service验证一下访问
[root@k8s-master-1 ~]# cat centos.yaml
kind: Deployment
apiVersion: apps/v1
metadata:
name: centos
spec:
replicas: 3
selector:
matchLabels:
app: httpd
template:
metadata:
creationTimestamp: null
labels:
app: httpd
spec:
containers:
- resources: {}
terminationMessagePath: /dev/termination-log
name: httpd
# command:
# - /bin/sh
ports:
- containerPort: 8080
protocol: TCP
imagePullPolicy: IfNotPresent
terminationMessagePolicy: File
image: >-
pww.artifactory.cdi.philips.com/tools/nginx:latest
# args:
# - '-c'
# - while true;do echo hello;sleep 100;done
restartPolicy: Always
terminationGracePeriodSeconds: 30
dnsPolicy: ClusterFirst
securityContext: {}
schedulerName: default-scheduler
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
maxSurge: 25%
revisionHistoryLimit: 10
progressDeadlineSeconds: 600
---
apiVersion: v1
kind: Service
metadata:
name: centos
spec:
selector:
app: httpd
type: NodePort
ports:
- name: http
port: 80
targetPort: 80
# nodePort: 30036
protocol: TCP
尝试访问所有主机的nodeport,如果访问不通,
nc -vz ip:port 发现timeout,排查掉安全组和其他策略后,
master01 ip add 发现
[root@k8s-master-1 ~]# ip add
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1442 qdisc pfifo_fast state UP group default qlen 1000
link/ether fa:16:3e:59:a4:17 brd ff:ff:ff:ff:ff:ff
inet 192.168.20.127/24 brd 192.168.20.255 scope global dynamic eth0
valid_lft 37859sec preferred_lft 37859sec
inet 192.168.20.150/24 scope global secondary eth0
valid_lft forever preferred_lft forever
inet6 fe80::f816:3eff:fe59:a417/64 scope link
valid_lft forever preferred_lft forever
3: flannel.1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1392 qdisc noqueue state UNKNOWN group default
link/ether ee:9b:21:8e:1f:0b brd ff:ff:ff:ff:ff:ff
inet 10.244.0.0/32 scope global flannel.1
valid_lft forever preferred_lft forever
inet6 fe80::ec9b:21ff:fe8e:1f0b/64 scope link
valid_lft forever preferred_lft forever
4: cni0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1392 qdisc noqueue state UP group default qlen 1000
link/ether 9e:78:14:ec:5c:79 brd ff:ff:ff:ff:ff:ff
inet 10.244.0.1/24 brd 10.244.0.255 scope global cni0
valid_lft forever preferred_lft forever
inet6 fe80::9c78:14ff:feec:5c79/64 scope link
valid_lft forever preferred_lft forever
5: veth2fd0488d@if2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1392 qdisc noqueue master cni0 state UP group default
link/ether e6:f5:1d:c1:08:9f brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet6 fe80::e4f5:1dff:fec1:89f/64 scope link
valid_lft forever preferred_lft forever
6: vethb48239f1@if2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1392 qdisc noqueue master cni0 state UP group default
link/ether 3e:89:6a:bb:c7:65 brd ff:ff:ff:ff:ff:ff link-netnsid 1
inet6 fe80::3c89:6aff:febb:c765/64 scope link
valid_lft forever preferred_lft forever
master02 ip add 发现
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1442 qdisc pfifo_fast state UP group default qlen 1000
link/ether fa:16:3e:28:ca:4d brd ff:ff:ff:ff:ff:ff
inet 192.168.20.32/24 brd 192.168.20.255 scope global dynamic eth0
valid_lft 42890sec preferred_lft 42890sec
inet6 fe80::f816:3eff:fe28:ca4d/64 scope link
valid_lft forever preferred_lft forever
3: flannel.1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1392 qdisc noqueue state UNKNOWN group default
link/ether ba:06:d0:c4:f9:b0 brd ff:ff:ff:ff:ff:ff
inet 10.244.1.0/32 scope global flannel.1
valid_lft forever preferred_lft forever
inet6 fe80::b806:d0ff:fec4:f9b0/64 scope link
valid_lft forever preferred_lft forever
现在问题为没有cni网卡如何解决:
我想到了两种恢复手段。第一,直接将节点踢出集群,然后重新加入。第二,尝试手工恢复cni0虚拟网卡,将pod网络接回来。第一种简单粗暴些为了避免出现修改影响到master组件。我倾向于尝试第二种方式。
第一种方案:
#node节点执行:
kubeadm reset
rm -rf /etc/kubernetes/admin.conf
rm -rf $HOME/.kube/config
#master操作
kubectl delete node xxx
#node 节点删除cni和flannel网卡(node操作)
kubeadm reset
ifconfig cni0 down
ifconfig flannel.1 down
ifconfig del flannel.1
ifconfig del cni0
ip link del flannel.1
ip link del cni0
# 命令执行过程中可能会有报错,有的网卡不存在则忽视
# 后面重新加入后会生成的
master操作
加入节点准备工作
# 通过 kubedam 重新生成 token
[root@master ~]# kubeadm token create --print-join-command
~~
kubeadm join 192.168.247.136:6443 --token x5phh9.9lpb629032p7dseb --discovery-token-ca-cert-hash sha256:bd23534d635b46f5316f0d388bd88853a6ddb47b1c04129bf25ea31cdbbfba4a
node节点重新加入
slave 执行join命令
###未验证
第二种方案:
如下是pod进出网络流量大致流程:
pod中产生数据,根据pod的路由信息,将数据发送到cni0;
cni0 根据node节点的路由表,将数据发送到隧道设备flannel.1;
flannel.1查看数据包的目的ip,从flanneld获得对端隧道设备的必要信息,封装数据包;
flannel.1将数据包发送到对端设备。对端节点的网卡接收到数据包,发现数据包为overlay数据包,解开外层封装,并发送内层封装到flannel.1设备;
数据达到node节点的flannel.1设备查看数据包,根据路由表匹配,将数据发送给cni0设备;
cni0匹配路由表,发送数据给网桥上对应的端口。
从通信过程可以知道,pod的网络需要连接到cni0网桥,而cni0和flannel.1网桥之间是没有连接的,通过node节点的路由表来实现转发通信的。所以,这里只需要将node节点所有的pod的网络虚拟对(veth pair)找到,然后将其中一端连接到重新创建的cni0虚拟网桥应该就可以了。变成了如下两个问题:
第一:如何创建cni0网桥并配置正确对应的参数?
由于flannel使用的是vxlan模式,所以创建cni0网桥的时候需要注意mtu值的设置。如下,创建cni0网桥:
// 创建cni0设备,指定类型为网桥
# ip link add cni0 type bridge
# ip link set dev cni0 up
// 为cni0设置ip地址,这个地址是pod的网关地址,需要和flannel.1对应网段
# ifconfig cni0 172.28.0.1/25
// 为cni0设置mtu为1450
# ifconfig cni0 mtu 1450 up
// 查看创建情况
# ifconfig cni0
cni0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1450
inet 172.28.0.1 netmask 255.255.255.128 broadcast 172.28.0.127
ether 0e:5e:b9:62:0d:60 txqueuelen 1000 (Ethernet)
RX packets 487334 bytes 149990594 (149.9 MB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 629306 bytes 925100055 (925.1 MB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
// 此时查看路由表,也已经有了去往本机pod网段的cni0信息
# route -n | grep cni0
172.28.0.0 0.0.0.0 255.255.255.128 U 0 0 0 cni0
第二:如何准确找出每个pod的对应的veth pair虚拟对,是否和名称空间有关系?
在一台只有两个pod的节点上查看虚拟网卡的情况,发现在node主机上可以看到两个veth前缀的虚拟网卡,它们的另一端在pod中,并且pod的netns也可以通过show命令看到。值得注意的是,在node节点同网络名称空间下的vethf0978d0c和vethb9525687的连接设置master为cni0。所以,找到所有的veth前缀的虚拟网卡,并将其挂载到cni0上即可。
// 这里通过一个简单的脚本批量将veth的虚拟网卡挂载到cni0网桥上
for veth in $(ip addr | grep veth | grep -v master | awk -F'[@|:]' '{print $2}' | sed 's/ //g')
do
ip link set dev $veth master cni0
done
通过以上两步操作后,失联的pod已经可以ping通,并和其它节点的pod正常通信了。测试新建删除pod都是正常的。
至此,手工恢复cni0算是完成了。如果还有下次 ip link del cni0的操作,可以不做节点排水操作。通过这个方式以最快,影响最小的方式恢复pod网络通信。这里也暴露了一个问题,那就是master节点的操作规范问题,对于生产环境,我们应该尽可能避免直接登录到master节点上进行操作,应该在其它管理机上授权k8s权限去操作。然后master节点尽量和业务分开独立部署,以保证master节点的稳定性。
更多推荐
所有评论(0)