通过kubeadm在AWS上创建高可用k8s集群
通过kubeadm在AWS上创建高可用k8s集群
kubeadm是官方社区推出的一个用于快速部署kubernetes集群的工具。
这个工具能通过两条指令完成一个kubernetes集群的部署:
# 创建一个 Master 节点
$ kubeadm init
# 将一个 Node 节点加入到当前集群中
$ kubeadm join <Master节点的IP和端口 >
1.安装要求
在开始之前,部署Kubernetes集群机器需要满足以下几个条件:
- 一台或多台机器,操作系统 CentOS7.x-86_x64
- 硬件配置:2GB或更多RAM,2个CPU或更多CPU,硬盘8GB或更多
- 可以访问外网,需要拉取镜像,如果服务器不能上网,需要提前下载镜像并导入节点
- 禁止swap分区
2. 准备环境
角色 | IP | 主机名<hostname> |
master1 | 10.3.3.41 | master1 |
master2 | 10.3.3.48 | master2 |
master3 | 10.3.3.115 | master3 |
node1 | 10.3.3.181 | node1 |
VIP(虚拟ip) | 10.3.3.88 |
系统优化
# 关闭防火墙
systemctl disable firewalld --now
# 关闭selinux
sudo sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久
setenforce 0 # 临时
# 关闭swap
swapoff -a # 临时
sed -ri 's/.*swap.*/#&/' /etc/fstab # 永久
# 根据规划设置主机名
sudo hostnamectl set-hostname <hostname>
# 在master添加hosts
sudo cat >> /etc/hosts << EOF
10.3.3.88 master.k8s.io k8s-vip
10.3.3.41 master01.k8s.io master1
1 0.3.3.48 master02.k8s.io master2
10.3.3.115 master03.k8s.io master3
10.3.3.181 node01.k8s.io node1
EOF
或者sudo vim /etc/hosts 编辑
# 所有机器将桥接的IPv4流量传递到iptables的链
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
或者sudo vim /etc/sysctl.d/k8s.conf 编辑
sudo sysctl --system # 生效
# 时间同步
sudo yum install ntpdate -y
sudo ntpdate time.windows.com
#时区修改
tzselect
#4
#9
#1
#1
export TZ='Asia/Shanghai'
source ~/.bashrc
sudo rm -f /etc/localtime
sudo ln -s /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
#K8s内核优化
cat <<EOF >> /etc/security/limits.conf
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF
或者sudo vim /etc/security/limits.conf
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 131072
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384
EOF
或者sudo vim /etc/sysctl.d/k8s.conf
sudo sysctl --system # 生效
3. 所有master节点部署keepalived
3.1 安装相关包和keepalived
sudo yum install -y conntrack-tools libseccomp libtool-ltdl
sudo yum install -y keepalived
3.2配置master节点
查看interface
ip addr
master1节点配置
cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 1
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
unicast_src_ip 10.3.3.41
unicast_peer {
10.3.3.48
10.3.3.115
}
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 4be37dc3b4c90194d1600c483e10ad1d
}
virtual_ipaddress {
10.3.3.88/24
}
track_script {
check_apiserver
}
notify_master "/etc/keepalived/assign_private_ip.sh 10.3.3.88 i-094f29f72ebc2d78d"
notify_backup "/etc/keepalived/unassign_private_ip.sh 10.3.3.88 i-094f29f72ebc2d78d"
}
EOF
或者sudo vim /etc/keepalived/keepalived.conf
master2节点配置
cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 1
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
unicast_src_ip 10.3.3.48
unicast_peer {
10.3.3.41
10.3.3.115
}
interface eth0
virtual_router_id 51
priority 90
advert_int 1
authentication {
auth_type PASS
auth_pass 4be37dc3b4c90194d1600c483e10ad1d
}
virtual_ipaddress {
10.3.3.88/24
}
track_script {
check_apiserver
}
notify_master "/etc/keepalived/assign_private_ip.sh 10.3.3.88 i-0c0ab1b8651b4aa9e"
notify_backup "/etc/keepalived/unassign_private_ip.sh 10.3.3.88 i-0c0ab1b8651b4aa9e"
}
EOF
或者sudo vim /etc/keepalived/keepalived.conf
Master3节点配置
cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 1
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
unicast_src_ip 10.3.3.115
unicast_peer {
10.3.3.41
10.3.3.48
}
interface eth0
virtual_router_id 51
priority 80
advert_int 1
authentication {
auth_type PASS
auth_pass 4be37dc3b4c90194d1600c483e10ad1d
}
virtual_ipaddress {
10.3.3.88/24
}
track_script {
check_apiserver
}
notify_master "/etc/keepalived/assign_private_ip.sh 10.3.3.88 i-0c0ab1b8651b4aa9e"
notify_backup "/etc/keepalived/unassign_private_ip.sh 10.3.3.88 i-0c0ab1b8651b4aa9e"
}
EOF
或者sudo vim /etc/keepalived/keepalived.conf
3.3 健康监测脚本,每台master都必须有
cat > /etc/keepalived/check_apiserver.sh <<"EOF"
#!/bin/bash
err=0
for k in $(seq 1 3)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
systemctl stop keepalived
exit 1
else
exit 0
fi
EOF
chmod +x /etc/keepalived/check_apiserver.sh
3.4 master机器上面配置shell 脚本切换IP
前提:三台master集群配置ak,sk
- sudo vim /etc/keepalived/assign_private_ip.sh
#!/bin/bash
LOG=/etc/keepalived/keepalived.log
IP=$1
INSTANCE_ID=$2
VIP=10.3.3.88
ENI=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[0].Instances[0].NetworkInterfaces[0].NetworkInterfaceId')
echo $(date) >> ${LOG}
echo "Adding IP $IP to ENI $ENI success" >> ${LOG}
aws ec2 assign-private-ip-addresses --network-interface-id $ENI --private-ip-addresses $IP --allow-reassignment
echo "Adding IP $IP success1" >> ${LOG}
sudo ip address add $VIP/24 dev eth0 >> ${LOG}
echo "Adding IP $IP success2" >> ${LOG}
- sudo vim /etc/keepalived/unassign_private_ip.sh
#!/bin/bash
IP=$1
INSTANCE_ID=$2
VIP=10.3.3.88
LOG=/etc/keepalived/keepalived.log
ENI=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[0].Instances[0].NetworkInterfaces[0].NetworkInterfaceId')
echo $(date) >> ${LOG}
echo "Removing IP $IP from ENI $ENI" >> ${LOG}
sudo aws ec2 unassign-private-ip-addresses --network-interface-id $ENI --private-ip-addresses $IP
sudo p address del $VIP/24 dev eth0 >> ${LOG}
echo "remove ip success" >> ${LOG}
3.5 启动和检查
在三台master节点都执行
# 启动keepalived
$sudo systemctl start keepalived.service
设置开机启动
$sudo systemctl enable keepalived.service
# 查看启动状态
$sudo systemctl status keepalived.service
启动后查看master1的网卡信息
ip a s eth0
4. 部署haproxy
4.1 安装
sudo yum install -y haproxy
4.2 配置
三台master节点的配置均相同,配置中声明了后端代理的两个master节点服务器,指定了haproxy运行的端口为16443等,因此16443端口为集群的入口
cat > /etc/haproxy/haproxy.cfg << EOF
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
# to have these messages end up in /var/log/haproxy.log you will
# need to:
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
log /dev/log local0
log /dev/log local1 notice
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
log global
mode http
option httplog
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
frontend monitor-in
bind *:33305
mode http
option httplog
monitor-uri /monitor
listen stats
bind *:8006
mode http
stats enable
stats hide-version
stats uri /stats
stats refresh 30s
stats realm Haproxy\ Sta tistics
stats auth admin:admin
#---------------------------------------------------------------------
# apiserver frontend which proxys to the masters
#---------------------------------------------------------------------
frontend apiserver
bind *:16443
mode tcp
option tcplog
default_backend apiserver
#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
option httpchk GET /healthz
option ssl-hello-chk
http-check expect status 200
mode tcp
balance roundrobin
server master1 10.3.3.41:6443 check
server master2 10.3.3.48:6443 check
server master3 10.3.3.115:6443 check
EOF
或者sudo vim /etc/haproxy/haproxy.cfg
http://69.230.210.85:8006/stats
4.3 启动和检查
两台master都启动
# 设置开机启动
$ sudo systemctl enable haproxy
# 开启haproxy
$ sudo systemctl start haproxy
# 查看启动状态
$ sudo systemctl status haproxy
检查端口
sudo netstat -lntup|grep haproxy
5. 所有节点安装Docker/kubeadm/kubelet
Kubernetes默认CRI(容器运行时)为Docker,因此先安装Docker。
5.1 安装Docker(aws参考docker安装)
$ wget https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -O /etc/yum.repos.d/docker-ce.repo
$ yum -y install docker-ce-18.06.1.ce-3.el7
$ systemctl enable docker && systemctl start docker
$ docker --version
Docker version 18.06.1-ce, build e68fc7a
$ cat > /etc/docker/daemon.json << EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://docker.mirrors.ustc.edu.cn"],
"log-driver": "json-file",
"log-opts": {
"max-size":"500m",
"max-file":"3"
},
"storage-driver": "overlay2",
"storage-opts": [ "overlay2.override_kernel_check=true" ]
}
EOF
或者sudo vim /etc/docker/daemon.json
sudo systemctl daemon-reload
sudo systemctl restart docker
sudo systemctl enable docker.service
5.2 添加阿里云YUM软件源
$ cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
或者sudo vim /etc/yum.repos.d/kubernetes.repo
5.3 安装kubeadm,kubelet和kubectl
由于版本更新频繁,这里指定版本号部署:
#分别查询1.22.5包是否在yum源里
sudo yum list kubelet --showduplicates | sort -r
sudo yum list kubectl --showduplicates | sort -r
sudo yum list kubeadm --showduplicates | sort -r
$ sudo yum install -y kubelet-1.22.5 kubeadm-1.22.5 kubectl-1.22.5
$ sudo systemctl enable kubelet
6. 部署Kubernetes Master
6.1 创建kubeadm配置文件
部署完高可用软件完再部署k8s容器
kubeadm config print init-defaults > kubeadm-init.yaml
在具有vip的master上操作,这里为master1
$ sudo vim kubeadm-init.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 10.3.3.41 <<---当前主机IP
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
imagePullPolicy: IfNotPresent
name: master1 <<--当前主机名还有下面三行
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
certSANs:
- 10.3.3.88 <<----VIP
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: 10.3.3.88:16443 <<<---VIP
controllerManager: {}
dns:
type: CoreDNS <<----手动添加
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: 1.22.5
networking:
dnsDomain: cluster.local
podSubnet: 10.244.0.0/16 <<----可以默认 注意与之后部署容器网络(CNI)里面定义Pod网络一致
serviceSubnet: 10.96.0.0/16 <<----可以默认
scheduler: {}
6.2 在master1节点执行
手动拉取镜像【加速】
sudo kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers
$ sudo kubeadm init --config kubeadm-init.yaml --upload-certs --ignore-preflight-errors=all --v=5
或者sudo kubeadm init --apiserver-advertise-address=10.3.3.4 --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.22.5 --service-cidr=10.96.0.0/12 --pod-network-cidr=10.244.0.0/16 --control-plane-endpoint 10.3.3.4:6443 --ignore-preflight-errors=all(需要copy 证书到其他的master)
注意 :如果出现 Error writing Crisocket information for the control-plane node: timed out waiting for the condition
重启 kubeadm 然后再执行init
sudo kubeadm reset
按照提示配置环境变量,使用kubectl工具:
$ mkdir -p $HOME/.kube
$ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
$ sudo chown $(id -u):$(id -g) $HOME/.kube/config
$ kubectl get nodes
$ kubectl get pods -n kube-system
6.3、添加kubectl补全功能
sudo yum -y install bash-completion && cd ~;echo "source <(kubectl completion bash)" >> .bashrc
按照提示保存以下内容,一会要使用:
#You can now join any number of the control-plane node running the following command on each as root:
kubeadm join 10.3.3.54:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:30cd7726c990de6288d144178095122c1149ef237c3ab85f6e68ce46d96578e8 \
--control-plane --certificate-key 77dfd4ee1a71f1a531d1ee2128469a728466a9add052acd14f2a9fda04c595e8
#Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 10.3.3.54:6443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:30cd7726c990de6288d144178095122c1149ef237c3ab85f6e68ce46d96578e8
查看集群状态
kubectl get cs
kubectl get pods -n kube-system
7.安装集群网络
从官方地址获取到calico的yaml,在master1上执行
wget https://docs.projectcalico.org/manifests/calico.yaml
修改CALICO_IPV4POOL_CIDR为自定义地址
该地址与kubeadm init指定pod-network-cidr一致
安装calico网络
kubectl apply -f calico.yaml
检查
kubectl get pods -n kube-system
安装flannel
跟pod一样
kubectl apply -f flannel.yaml
集群部署后scheduler状态为Unhealthy
在安装网络插件之后再修改此处,所有的master都修改
sudo vim /etc/kubernetes/manifests/kube-scheduler.yaml
sudo vim /etc/kubernetes/manifests/kube-controller-manager.yaml
- --port=0 注释
重启
sudo systemctl restart kubelet.service
8、master2节点加入集群
8.1 master2,master3加入集群
执行在master1上init后输出的join命令,需要带上参数--control-plane表示把master控制节点加入集群
sudo kubeadm join 10.3.3.88:16443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:30cd7726c990de6288d144178095122c1149ef237c3ab85f6e68ce46d96578e8 \
--control-plane --certificate-key --ignore-preflight-errors=all --v=5
检查状态
kubectl get node
kubectl get pods --all-namespaces
9. 加入Kubernetes Node
在node1上执行
向集群添加新节点,执行在kubeadm init输出的kubeadm join命令:
sudo kubeadm join 10.3.3.88:16443 --token abcdef.0123456789abcdef --discovery-token-ca-cert-hash sha256:30cd7726c990de6288d144178095122c1149ef237c3ab85f6e68ce46d96578e8 --ignore-preflight-errors=all --v=5
检查状态
kubectl get node
kubectl get pods --all-namespaces
10. 测试kubernetes集群
在Kubernetes集群中创建一个pod,验证是否正常运行:
$ kubectl create deployment nginx --image=nginx
$ kubectl expose deployment nginx --port=80 --type=NodePort
$ kubectl get pod,svc
访问:curl http://10.3.3.41:31552
更多推荐
所有评论(0)