kubeadm安装高可用K8S-1.18集群<外部etcd方式>


## 解耦了控制平面和Etcd,集群风险小,单独挂了一台master或etcd对集群影响很小。etcd在外部方便维护和恢复。

集群规划
主机ip             角色
192.128.232.11 node01,etcd01
192.128.232.12 master02,etcd02,dns
192.128.232.13 master03,etcd03,harbor
192.128.232.15 vip地址

一:k8s初始化安装
[root@master ~]# yum install -y yum-utils device-mapper-persistent-data lvm2
[root@master ~]# yum install wget net-tools telnet tree nmap sysstat lrzsz dos2unix bind-utils ntpdate -y
[root@master ~]# yum -y install bash-completion
[root@master ~]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@master ~]# yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm

[root@master yum.repos.d]# cat > kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

二:安装master节点
#更新repo源,所以节点都需要初始化
[root@master ~]# rm /etc/localtime -rf
[root@master ~]# ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
[root@master ~]# ntpdate 0.asia.pool.ntp.org

[root@master ~]# yum repolist && yum makecache fast

# Disable the SELinux.
[root@master01 ~]# sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config

#关闭swap
[root@master02 ~]# swapoff -a
[root@master01 ~]# sed -i.bak '/swap/s/^/#/' /etc/fstab

# Turn off and disable the firewalld.
[root@master ~]# systemctl stop firewalld
[root@master ~]# systemctl disable firewalld

[root@master01 ~]# ssh-keygen -f ~/.ssh/id_rsa -N ''
[root@master01 ~]# cat k8s_ip.txt 
192.128.232.12
192.128.232.13

[root@master01 ~]# for all_ip in `cat k8s_ip.txt`
do
   echo ">>> ${all_ip}"
   ssh-copy-id -i ~/.ssh/id_rsa.pub root@${all_ip}
done

[root@master01 ~]#  yum install bind -y
[root@master01 ~]# cat /etc/named.conf 

options {
        listen-on port 53 { 192.128.232.12; };   #dns主机ip
        listen-on-v6 port 53 { ::1; };
        directory       "/var/named";
        dump-file       "/var/named/data/cache_dump.db";
        statistics-file "/var/named/data/named_stats.txt";
        memstatistics-file "/var/named/data/named_mem_stats.txt";
        recursing-file  "/var/named/data/named.recursing";
        secroots-file   "/var/named/data/named.secroots";
        allow-query     { any; };
        forwarders      { 192.128.232.2; };  #另外添加的,一般以网关转发,
        dnssec-enable no;  # 原本是yes
        dnssec-validation no;  # 原本是yes
        recursion yes;

#参数注解
named.conf文件内容解析:
   listen-on:监听端口,改为监听在内网,这样其它机器也可以用
   allow-query:哪些客户端能通过自建的DNS查
   forwarders:上级DNS是什么

# 检查修改情况,没有报错即可(即没有信息)
[root@master01 ~]# named-checkconf

#文件最后面添加下面内容,定义区域
[root@master01 ~]# cat  /etc/named.rfc1912.zones

#主机域
zone "host.com" IN {
        type  master;
        file  "host.com.zone";
        allow-update { 192.128.232.12; };
};
#业务域
zone "od.com" IN {
        type  master;
        file  "od.com.zone";
        allow-update { 192.128.232.12; };
};


#添加主机域文件
[root@master01 ~]# cat /var/named/host.com.zone
$ORIGIN host.com.
$TTL 600        ; 10 minutes
@       IN SOA  dns.host.com. dnsadmin.host.com. (
                                2020011201 ; serial
                                10800      ; refresh (3 hours)
                                900        ; retry (15 minutes)
                                604800     ; expire (1 week)
                                86400      ; minimum (1 day)
                                )
                        NS   dns.host.com.
$TTL 60 ; 1 minute
dns                     A    192.128.232.12
node01                  A    192.128.232.11
master01                A    192.128.232.12
master02                A    192.128.232.13
etcd01                  A    192.128.232.11
etcd02                  A    192.128.232.12
etcd03                  A    192.128.232.13


#添加业务域文件
[root@master01 ~]# cat  /var/named/od.com.zone
$ORIGIN od.com.
$TTL 600        ; 10 minutes
@               IN SOA  dns.od.com. dnsadmin.od.com. (
                                2020011201 ; serial
                                10800      ; refresh (3 hours)
                                900        ; retry (15 minutes)
                                604800     ; expire (1 week)
                                86400      ; minimum (1 day)
                                )
                                NS   dns.od.com.
$TTL 60 ; 1 minute
dns                A    192.128.232.12


# 看一下有没有报错
[root@master01 ~]# named-checkconf
[root@master01 ~]# systemctl start named
[root@master01 ~]# netstat -luntp|grep 53

#参数注解
   TTL 600:指定IP包被路由器丢弃之前允许通过的最大网段数量
   10 minutes:过期时间10分钟
   SOA:一个域权威记录的相关信息,后面有5组参数分别设定了该域相关部分
   dnsadmin.od.com. 一个假的邮箱
   serial:记录的时间
   $ORIGIN:即下列的域名自动补充od.com,如dns,外面看来是dns.od.com
   netstat -luntp:显示 tcp,udp 的端口和进程等相关情况

# master01机器,检查主机域是否解析
[root@master01 ~]# dig -t A master01.host.com @192.128.232.12 +short
192.128.232.12

# 配置linux客户端和win客户端都能使用这个服务,修改,定义dns解析ip为自建dns服务器
[root@master01 ~]# vi /etc/sysconfig/network-scripts/ifcfg-eth0
DNS1=192.128.232.12
[root@master01 ~]# systemctl restart network
[root@master01 ~]# ping www.baidu.com
[root@master01 ~]# ping master01.host.com


#安装harbor私有仓库,安装在192.128.232.13
[root@master02 ~]# cd /opt
[root@master02 ~]# mkdir src
[root@master02 ~]# cd src/
# 可以去这个地址下载,也可以直接用我用的软件包
https://github.com/goharbor/harbor/releases
[root@master02 ~]# src]# tar xf harbor-offline-installer-v2.0.1.tgz -C /opt/
[root@master02 ~]# cd /opt/harbor/
[root@master02 harbor]# cp harbor.yml.tmpl harbor.yml

#修改harbor配置文件,修改下面四个选项
[root@master02 harbor]# vi harbor.yml
hostname: harbor.od.com  
http:
  port: 180
data_volume: /data/harbor
location: /data/harbor/logs

#安装harbor私有仓库
[root@master02 harbor]# mkdir -p /data/harbor/logs
[root@master02 harbor]# yum install docker-compose -y
[root@master02 harbor]# ./install.sh

[root@master02 harbor]# docker-compose ps
[root@master02 harbor]# docker ps -a
[root@master02 harbor]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm

###相关报错问题:
yum的时候报:/var/run/yum.pid 已被锁定,PID 为 1610 的另一个程序正在运行。
另外一个程序锁定了 yum;等待它退出……
网上统一的解决办法:直接在终端运行 rm -f /var/run/yum.pid 将该文件删除,然后再次运行yum。
###

[root@master02 harbor]# vi /etc/nginx/conf.d/harbor.od.com.conf
server {
    listen       80;
    server_name  harbor.od.com;

    client_max_body_size 1000m;

    location / {
        proxy_pass http://127.0.0.1:180;
    }
}

[root@master02 harbor]# nginx -t
[root@master02 harbor]# systemctl start nginx
[root@master02 harbor]# systemctl enable nginx

# 在自建DNS12机器解析域名:
[root@master01 ~]# vi /var/named/od.com.zone
# 注意serial前滚一个序号
# 最下面添加域名
harbor             A    192.128.232.13

[root@master01 ~]# systemctl restart named
[root@master01 ~]# dig -t A harbor.od.com +short
192.128.232.13

#在浏览器上打开harbor.od.com,并创建kubeadm项目
账号:admin
密码:Harbor12345

#kubeadm,kubelet,docker-ce,kubectl四个程序都安装同一个版本
[root@master ~]# yum list kubelet --showduplicates | sort -r
[root@master ~]# yum -y install kubectl-1.18.18 kubelet-1.18.18 kubeadm-1.18.18 docker-ce-20.10.6

[root@master ~]# modprobe br_netfilter
[root@master ~]# cat > /etc/sysctl.d/kubernetes.conf << EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF

[root@master ~]# sysctl -p /etc/sysctl.d/kubernetes.conf

[root@master ~]# systemctl enable docker && systemctl start docker && systemctl enable kubelet

// 设置 registry-mirrors 镜像加速器,可以提升获取 docker 官方镜像的速度
// 设置 cgroupdriver 为 systemd,和 kubelet 的保持一致
[root@master ~]# mkdir /data/docker -p
[root@master ~]# cat > /etc/docker/daemon.json << EOF
{
  "registry-mirrors": ["https://4wvlvmti.mirror.aliyuncs.com"],
  "storage-driver": "overlay2",
  "insecure-registries": ["registry.access.redhat.com","quay.io","harbor.od.com"],
  "graph": "/data/docker",
  "exec-opts": ["native.cgroupdriver=systemd"],
  "live-restore": true
}
EOF
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart docker

[root@master ~]# yum -y install ipvsadm ipset sysstat conntrack libseccomp

[root@master ~]# hostnamectl set-hostname master01
[root@master01 ~]# cat >> /etc/hosts << EOF
192.128.232.11 node01
192.128.232.12 master02
192.128.232.13 master03
EOF

[root@master01 ~]# cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/sh
modprobe -- ip_vs
modprobe -- ip_vs_nq
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF

[root@master01 ~]# chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh               12688  0 
ip_vs_wrr              12697  0 
ip_vs_rr               12600  0 
ip_vs                 145458  6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack_ipv4      15053  2 
nf_defrag_ipv4         12729  1 nf_conntrack_ipv4
nf_conntrack          139264  7 ip_vs,nf_nat,nf_nat_ipv4,xt_conntrack,nf_nat_masquerade_ipv4,nf_conntrack_netlink,nf_conntrack_ipv4
libcrc32c              12644  4 xfs,ip_vs,nf_nat,nf_conntrack

#关闭swap
[root@master ~]# rpm -ql kubelet
/etc/kubernetes/manifests
/etc/sysconfig/kubelet
/usr/bin/kubelet
/usr/lib/systemd/system/kubelet.service

[root@master ~]# cat > /etc/sysconfig/kubelet << EOF
KUBELET_EXTRA_ARGS="--fail-swap-on=false"
EOF

#设置开机自启kubelet,但不启动
[root@master ~]# systemctl enable kubelet docker
[root@master ~]# systemctl start docker

[root@master01 ~]# systemctl enable kubelet && systemctl start kubelet

# kubectl命令补全
[root@master01 ~]# echo "source <(kubectl completion bash)" >> ~/.bash_profile
[root@master01 ~]# source ~/.bash_profile

#三台master安装keepalived
[root@master ~]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
[root@master ~]# yum -y install keepalived

[root@master01 ~]# cat > /etc/keepalived/keepalived.conf<<EOF 
! Configuration File for keepalived
global_defs {
   router_id 192.128.232.12
}
#vrrp_script chk_nginx模块在没有部署k8s的master的时候,6443是不存在的,需要事先注释掉,否则启动不了。
vrrp_script chk_nginx {
    script "/etc/keepalived/check_port.sh 6443"
    interval 2
    weight -20
}
vrrp_instance VI_1 {
    state BACKUP    #全部节点设置BACKUP,以优先级高暂时为master。
    interface eth0      #对应主机的网卡名称
    virtual_router_id 251   #id号要一致
    priority 100
    advert_int 1
    nopreempt     #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
    unicast_peer {        #写上其他安装keepalived主机的ip
        192.128.232.13
    }
    authentication {
        auth_type PASS
        auth_pass 11111111
    }
    #这里track_script也需要注释掉
    track_script {
         chk_nginx
    }
    virtual_ipaddress {
        192.128.232.15/24    #vip地址
    }
}
EOF

[root@master01 ~]# cat > /etc/keepalived/check_port.sh<<EOF
#!/bin/bash
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
#    script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
#    interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
         PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
        echo $PORT_PROCESS
        if [ $PORT_PROCESS -eq 0 ];then
                echo "Port $CHK_PORT Is Not Used,End."
                systemctl stop keepalived
        fi
else
        echo "Check Port Cant Be Empty!"
fi
EOF


[root@master01 kubernetes]# systemctl restart keepalived
[root@master01 kubernetes]# systemctl enable keepalived

#############################################################################
#master02的keepalived配置
[root@master02 .ssh]# cat > /etc/keepalived/keepalived.conf <<EOF
! Configuration File for keepalived
global_defs {
   router_id 192.128.232.13
}
#vrrp_script chk_nginx模块在没有部署k8s的master的时候,6443是不存在的,需要事先注释掉,否则启动不了。
vrrp_script chk_nginx {
    script "/etc/keepalived/check_port.sh 6443"
    interval 2
    weight -20
}
vrrp_instance VI_1 {
    state BACKUP    #全部节点设置BACKUP,以优先级高暂时为master。
    interface eth0      #对应主机的网卡名称
    virtual_router_id 251   #id号要一致
    priority 90
    advert_int 1
    nopreempt     #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
    unicast_peer {        #写上其他安装keepalived主机的ip
        192.128.232.12
    }
    authentication {
        auth_type PASS
        auth_pass 11111111
    }
    #这里track_script也需要注释掉
    track_script {
         chk_nginx
    }
    virtual_ipaddress {
        192.128.232.15/24    #vip地址
    }
}
EOF

[root@master02 ~]# cat > /etc/keepalived/check_port.sh<<EOF
#!/bin/bash
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
#    script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
#    interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
         PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
        echo $PORT_PROCESS
        if [ $PORT_PROCESS -eq 0 ];then
                echo "Port $CHK_PORT Is Not Used,End."
                systemctl stop keepalived
        fi
else
        echo "Check Port Cant Be Empty!"
fi
EOF

[root@master02 kubernetes]# systemctl restart keepalived
[root@master02 kubernetes]# systemctl enable keepalived


三、搭建高可用etcd集群
1、在node01上安装cfssl
[root@node01 ~]# wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 -O /usr/local/bin/cfssl
[root@node01 ~]# wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 -O /usr/local/bin/cfssljson
[root@node01 ~]# wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 -O /usr/local/bin/cfssl-certinfo
[root@node01 ~]# chmod +x /usr/local/bin/cfssl*

2) 创建ca证书
#创建ca证书的配置文件
[root@node01 ~]# mkdir /opt/certs
[root@node01 ~]# cd /opt/certs
[root@node01 ~]# cat >ca-config.json<<EOF
{
    "signing": {
        "default": {
            "expiry": "438000h"
        },
        "profiles": {
            "server": {
                "expiry": "438000h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "server auth",
                    "client auth"
                ]
            },
            "client": {
                "expiry": "438000h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "client auth"
                ]
            },
            "peer": {
                "expiry": "438000h",
                "usages": [
                    "signing",
                    "key encipherment",
                    "server auth",
                    "client auth"
                ]
            }
        }
    }
}
EOF

server auth:表示client可以用该ca对server提供的证书进行验证
client auth:表示server可以用该ca对client提供的证书进行验证
创建证书签名请求ca-csr.json
[root@node01 ~]# cat > ca-csr.json <<EOF
{
    "CN": "etcd",
    "key": {
        "algo": "rsa",
        "size": 2048
    }
}
EOF

##生成CA证书和私钥

[root@node01 ~]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca
# ls ca*
# ca-config.json ca.csr ca-csr.json ca-key.pem ca.pem

3) 生成客户端证书
[root@node01 ~]# cat > client.json <<EOF
{
    "CN": "client",
    "key": {
        "algo": "ecdsa",
        "size": 256
    }
}
EOF

#生成etcd的client证书
[root@node01 ~]#  cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=client client.json  | cfssljson -bare etcd-client -

4) 生成server,peer证书
创建配置 ,hosts字段都是etcd可能会部署的节点ip。
[root@node01 ~]# cat > etcd.json <<EOF
{
    "CN": "etcd",
    "hosts": [ 
        "192.128.232.11",
        "192.128.232.12",
        "192.128.232.13",
        "192.128.232.14",
        "192.128.232.16",
        "192.128.232.17"
        "etcd01.host.com",
        "etcd02.host.com",
        "etcd03.host.com"
    ],
    "key": {
        "algo": "ecdsa",
        "size": 256
    },
    "names": [
        {
            "C": "CN",
            "L": "BJ",
            "ST": "BJ"
        }
    ]
}
EOF

# 生成etcd的server跟peer证书
[root@node01 ~]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=server etcd.json | cfssljson -bare etcd-server -

[root@node01 ~]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=peer etcd.json | cfssljson -bare etcd-peer -

3.部署etcd节点,node01
[root@node01 ~]# cd /opt
[root@node01 ~]# wget https://storage.googleapis.com/etcd/v3.1.25/etcd-v3.1.25-linux-amd64.tar.gz
[root@node01 ~]# tar zxf etcd-v3.1.25-linux-amd64.tar.gz
[root@node01 ~]# ln -sv etcd-v3.1.25-linux-amd64 etcd
[root@node01 ~]# mkdir -p /opt/etcd/certs /opt/etcd/data /data/logs/etcd-server
[root@node01 etcd]# useradd -s /sbin/nologin -M etcd
[root@node01 certs]# cp /opt/certs/* /opt/etcd/certs/
[root@node01 etcd]# chmod 700 /opt/etcd/data
[root@node01 certs]# cd /opt/etcd
[root@node01 etcd]# vi etcd-server-startup.sh
#!/bin/sh
./etcd --name etcd01 \
       --data-dir /opt/etcd/data \
       --listen-peer-urls https://192.128.232.11:2380 \
       --listen-client-urls https://192.128.232.11:2379,http://127.0.0.1:2379 \
       --initial-advertise-peer-urls https://192.128.232.11:2380 \
       --advertise-client-urls https://192.128.232.11:2379,http://127.0.0.1:2379 \
       --initial-cluster  etcd01=https://192.128.232.11:2380,etcd02=https://192.128.232.12:2380,etcd03=https://192.128.232.13:2380 \
       --initial-cluster-state=new \
       --cert-file=/opt/etcd/certs/etcd-server.pem \
       --key-file=/opt/etcd/certs/etcd-server-key.pem \
       --peer-cert-file=/opt/etcd/certs/etcd-peer.pem \
       --peer-key-file=/opt/etcd/certs/etcd-peer-key.pem \
       --trusted-ca-file=/opt/etcd/certs/ca.pem \
       --peer-trusted-ca-file=/opt/etcd/certs/ca.pem \
       --quota-backend-bytes 8000000000 \
       --log-output stdout

[root@node01 etcd]# chmod +x etcd-server-startup.sh
[root@node01 etcd]# chown -R etcd.etcd /opt/etcd* /data/logs/etcd-server


#安装supervisor
[root@node01 etcd]# yum install supervisor -y
[root@node01 etcd]# systemctl start supervisord
[root@node01 etcd]# systemctl enable supervisord

[root@node01 etcd]# vi /etc/supervisord.d/etcd-server.ini
[program:etcd01]
command=/opt/etcd/etcd-server-startup.sh                        ; the program (relative uses PATH, can take args)
numprocs=1                                                      ; number of processes copies to start (def 1)
directory=/opt/etcd                                             ; directory to cwd to before exec (def no cwd)
autostart=true                                                  ; start at supervisord start (default: true)
autorestart=true                                                ; retstart at unexpected quit (default: true)
startsecs=30                                                    ; number of secs prog must stay running (def. 1)
startretries=3                                                  ; max # of serial start failures (default 3)
exitcodes=0,2                                                   ; 'expected' exit codes for process (default 0,2)
stopsignal=QUIT                                                 ; signal used to kill process (default TERM)
stopwaitsecs=10                                                 ; max num secs to wait b4 SIGKILL (default 10)
user=etcd                                                       ; setuid to this UNIX account to run the program
redirect_stderr=true                                            ; redirect proc stderr to stdout (default false)
stdout_logfile=/data/logs/etcd-server/etcd.stdout.log           ; stdout log path, NONE for none; default AUTO
stdout_logfile_maxbytes=64MB                                    ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=4                                        ; # of stdout logfile backups (default 10)
stdout_capture_maxbytes=1MB                                     ; number of bytes in 'capturemode' (default 0)
stdout_events_enabled=false                                     ; emit events on stdout writes (default false)


[root@node01 etcd]# supervisorctl update
[root@node01 etcd]# supervisorctl status
etcd01                           RUNNING   pid 6379, uptime 0:08:20

[root@node01 certs]# ss -lnt
State      Recv-Q Send-Q                       Local Address:Port                                      Peer Address:Port              
LISTEN     0      128                         192.128.232.11:2379                                                 *:*                  
LISTEN     0      128                              127.0.0.1:2379                                                 *:*                  
LISTEN     0      128                         192.128.232.11:2380                                                 *:*                  

4.在master01安装第二个etcd节点。
[root@master01 ~]# tar -zxf etcd-v3.1.25-linux-amd64.tar.gz -C /opt/
[root@master01 ~]# cd /opt/
[root@master01 opt]# ln -sv etcd-v3.1.25-linux-amd64/ etcd
[root@master01 opt]# useradd -s /sbin/nologin -M etcd
[root@master01 opt]# mkdir -p /opt/etcd/certs /opt/etcd/data /data/logs/etcd-server
[root@master01 opt]# chmod 700 /opt/etcd/data
#拷贝node01上面的证书到master01
[root@master01 ~]# cd /opt/etcd/certs/
[root@master01 ~]# useradd -s /sbin/nologin -M etcd
[root@master01 certs]# scp node01:/opt/etcd/certs/* . 


[root@master01 etcd]# vi /opt/etcd/etcd-server-startup.sh
#!/bin/sh
./etcd --name etcd02 \
       --data-dir /opt/etcd/data \
       --listen-peer-urls https://192.128.232.12:2380 \
       --listen-client-urls https://192.128.232.12:2379,http://127.0.0.1:2379 \
       --initial-advertise-peer-urls https://192.128.232.12:2380 \
       --advertise-client-urls https://192.128.232.12:2379,http://127.0.0.1:2379 \
       --initial-cluster  etcd01=https://192.128.232.11:2380,etcd02=https://192.128.232.12:2380,etcd03=https://192.128.232.13:2380 \
       --initial-cluster-state=new \
       --cert-file=/opt/etcd/certs/etcd-server.pem \
       --key-file=/opt/etcd/certs/etcd-server-key.pem \
       --peer-cert-file=/opt/etcd/certs/etcd-peer.pem \
       --peer-key-file=/opt/etcd/certs/etcd-peer-key.pem \
       --trusted-ca-file=/opt/etcd/certs/ca.pem \
       --peer-trusted-ca-file=/opt/etcd/certs/ca.pem \
       --quota-backend-bytes 8000000000 \
       --log-output stdout

[root@master01 etcd]# chmod +x /opt/etcd/etcd-server-startup.sh
[root@master01 etcd]# chown -R etcd.etcd /opt/etcd* /data/logs/etcd-server
[root@master01 etcd]# yum install supervisor -y
[root@master01 etcd]# systemctl start supervisord
[root@master01 etcd]# systemctl enable supervisord

[root@master01 etcd]# vi /etc/supervisord.d/etcd-server.ini
[program:etcd02]   #每个etcd修改这个地方
command=/opt/etcd/etcd-server-startup.sh                        ; the program (relative uses PATH, can take args)
numprocs=1                                                      ; number of processes copies to start (def 1)
directory=/opt/etcd                                             ; directory to cwd to before exec (def no cwd)
autostart=true                                                  ; start at supervisord start (default: true)
autorestart=true                                                ; retstart at unexpected quit (default: true)
startsecs=30                                                    ; number of secs prog must stay running (def. 1)
startretries=3                                                  ; max # of serial start failures (default 3)
exitcodes=0,2                                                   ; 'expected' exit codes for process (default 0,2)
stopsignal=QUIT                                                 ; signal used to kill process (default TERM)
stopwaitsecs=10                                                 ; max num secs to wait b4 SIGKILL (default 10)
user=etcd                                                       ; setuid to this UNIX account to run the program
redirect_stderr=true                                            ; redirect proc stderr to stdout (default false)
stdout_logfile=/data/logs/etcd-server/etcd.stdout.log           ; stdout log path, NONE for none; default AUTO
stdout_logfile_maxbytes=64MB                                    ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=4                                        ; # of stdout logfile backups (default 10)
stdout_capture_maxbytes=1MB                                     ; number of bytes in 'capturemode' (default 0)
stdout_events_enabled=false                                     ; emit events on stdout writes (default false)


[root@master01 etcd]# supervisorctl update
etcd02: added process group

[root@master01 certs]# supervisorctl status
etcd02        RUNNING   pid 16899, uptime 0:03:30

#同样在master02安装第三个etcd03

6.验证etcd集群
[root@master02 etcd]# cp /opt/etcd/etcdctl /usr/local/bin/
[root@master02 certs]# cd /opt/etc/certs
[root@master02 certs]# etcdctl --cacert=ca.pem --cert=etcd-server.pem --key=etcd-server-key.pem --endpoints="https://192.128.232.11:2379,https://192.128.232.12:2379,https://192.128.232.13:2379" endpoint status --write-out=table
+------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
|        ENDPOINT        |        ID        | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://192.128.232.11:2379 | 49df13ef248c0ccd |  3.4.10 |   20 kB |      true |      false |       207 |          9 |                  9 |        |
| https://192.128.232.12:2379 | bca178b1cfc53fdd |  3.4.10 |   25 kB |     false |      false |       207 |          9 |                  9 |        |
| https://192.128.232.13:2379 | 2192f9b1cd786234 |  3.4.10 |   20 kB |     false |      false |       207 |          9 |                  9 |        |
+------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+

 
[root@master02 certs]# etcdctl --cacert=ca.pem --cert=etcd-server.pem --key=etcd-server-key.pem --endpoints="https://192.128.232.11:2379" endpoint status -w table
+-----------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
|          ENDPOINT           |       ID        | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+-----------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://192.128.232.11:2379 | 326ec117eddf797 |  3.4.10 |   20 kB |      true |      false |      1496 |          9 |                  9 |        |
+-----------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+

7.maste01初始化操作
[root@master01 ~]# mkdir -p /etc/kubernetes/pki/etcd/
[root@master01 ~]# cp /opt/certs/ca.pem /etc/kubernetes/pki/etcd/
[root@master01 ~]# cp /opt/certs/etcd-client.pem /etc/kubernetes/pki/apiserver-etcd-client.pem
[root@master01 ~]# cp /opt/certs/etcd-client-key.pem /etc/kubernetes/pki/apiserver-etcd-client-key.pem

[root@master01 ~]# tree /etc/kubernetes/pki/
/etc/kubernetes/pki/
├── apiserver-etcd-client-key.pem
├── apiserver-etcd-client.pem
└── etcd
    └── ca.pem

1 directory, 3 files

#下面镜像
[root@master01 ~]# kubeadm config images push

[root@master01 ~]# docker images|grep k8s >k8s_images.txt 

#通过下载到本地的镜像,推送到harbor私有仓库。
[root@master01 ~]# vi k8s_images_tag_push.sh
#!/bin/bash
cat /root/k8s_images.txt |while read line
do 
  ImageId=`echo $line|awk '{print $3}'`
  ImageName=`echo $line|awk -F'/' '{print $2}'|awk '{print $1}'`
  ImageVersion=`echo $line|awk '{print $2}'`
  docker tag $ImageId harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
  docker push harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
done


8.创建初始化配置文件并修改。
[root@master01 ~]# kubeadm config print init-defaults > kubeadm-config.yaml

#再初始化master的时候,把kubeadm-config.yaml文件内容注释的注解全部删除,不然会初始化有问题。
[root@master01 ~]# vi kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
kubernetesVersion: v1.18.20
imageRepository: harbor.od.com/kubeadm
dns:
  type: CoreDNS
etcd: 
#  local:
#    dataDir: /var/lib/etcd  # 由于是k8s集群之外,所以etcd数据目录不需要定义。
  external:
    endpoints:     #下面为自定义etcd集群地址
    - https://192.128.232.11:2379
    - https://192.128.232.12:2379
    - https://192.128.232.13:2379
    caFile: /etc/kubernetes/pki/etcd/ca.pem
    certFile: /etc/kubernetes/pki/apiserver-etcd-client.pem
    keyFile: /etc/kubernetes/pki/apiserver-etcd-client-key.pem
apiServer:
  certSANs:    #填写所有kube-apiserver节点的hostname、IP、VIP,不能写网段,
  - master01
  - master02
  - master03
  - node01
  - node02
  - node03
  - 192.128.232.11
  - 192.128.232.12
  - 192.128.232.13
  - 192.128.232.14
  - 192.128.232.15
  - 192.128.232.16
  - 192.128.232.17
  - 192.128.232.18
  - 192.128.232.19
  - 192.128.232.20
controlPlaneEndpoint: "192.128.232.15:6443"  #vip地址跟端口
networking:
  podSubnet: "10.244.0.0/16"
  serviceSubnet: "10.96.0.0/12"
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs  # kube-proxy 模式

################################master init #############################################
##master01初始化k8s集群
[root@master01 ~]# kubeadm init --config=kubeadm-config.yaml
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:

  kubeadm join 192.128.232.15:6443 --token nxejnz.s3c66bpkw3aibbd5 \
    --discovery-token-ca-cert-hash sha256:9c6d3b715620fb601470d5186b46b9f693f06c7bba69e7e2e70c91b7791f7010 \
    --control-plane 

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.128.232.15:6443 --token nxejnz.s3c66bpkw3aibbd5 \
    --discovery-token-ca-cert-hash sha256:9c6d3b715620fb601470d5186b46b9f693f06c7bba69e7e2e70c91b7791f7010 

#配置kubectl工具
[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config

#查看cs服务
[root@master01 ~]# kubectl get cs
NAME                 STATUS      MESSAGE                                                                                     ERROR
scheduler            Unhealthy   Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused   
controller-manager   Unhealthy   Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused   
etcd-1               Healthy     {"health":"true"}                                                                           
etcd-2               Healthy     {"health":"true"}                                                                           
etcd-0               Healthy     {"health":"true"}                                                                          

#因为scheduler,controller-manager绑定的port为0导致的。
[root@master01 ~]# cd /etc/kubernetes/manifests/
[root@master01 manifests]# ll
总用量 12
-rw------- 1 root root 2789 12月  8 17:02 kube-apiserver.yaml
-rw------- 1 root root 2592 12月  8 17:02 kube-controller-manager.yaml
-rw------- 1 root root 1147 12月  8 17:02 kube-scheduler.yaml

[root@master01 manifests]# vi kube-controller-manager.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-controller-manager
    tier: control-plane
  name: kube-controller-manager
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-controller-manager
    - --allocate-node-cidrs=true
    - --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --bind-address=127.0.0.1
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --cluster-cidr=10.244.0.0/16
    - --cluster-name=kubernetes
    - --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
    - --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
    - --controllers=*,bootstrapsigner,tokencleaner
    - --kubeconfig=/etc/kubernetes/controller-manager.conf
    - --leader-elect=true
    - --node-cidr-mask-size=24
#    - --port=0     #注释掉port
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
    - --root-ca-file=/etc/kubernetes/pki/ca.crt
    - --service-account-private-key-file=/etc/kubernetes/pki/sa.key
    - --service-cluster-ip-range=10.96.0.0/12
    - --use-service-account-credentials=true
    image: harbor.od.com/kubeadm/kube-controller-manager:v1.18.20
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10257
        scheme: HTTPS
      initialDelaySeconds: 15
      timeoutSeconds: 15
    name: kube-controller-manager
    resources:
      requests:
        cpu: 200m
    volumeMounts:
    - mountPath: /etc/ssl/certs
      name: ca-certs
      readOnly: true
    - mountPath: /etc/pki
      name: etc-pki
      readOnly: true
    - mountPath: /usr/libexec/kubernetes/kubelet-plugins/volume/exec
      name: flexvolume-dir
    - mountPath: /etc/kubernetes/pki
      name: k8s-certs
      readOnly: true
    - mountPath: /etc/kubernetes/controller-manager.conf
      name: kubeconfig
      readOnly: true
  hostNetwork: true
  priorityClassName: system-cluster-critical
  volumes:
  - hostPath:
      path: /etc/ssl/certs
      type: DirectoryOrCreate

[root@master01 manifests]# vi kube-scheduler.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-scheduler
    - --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
    - --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
    - --bind-address=127.0.0.1
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
#    - --port=0  #注释掉port
    image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10259
        scheme: HTTPS
      initialDelaySeconds: 15
      timeoutSeconds: 15
    name: kube-scheduler
    resources:
      requests:
        cpu: 100m
    volumeMounts:
    - mountPath: /etc/kubernetes/scheduler.conf
      name: kubeconfig
      readOnly: true
  hostNetwork: true
  priorityClassName: system-cluster-critical
  volumes:
  - hostPath:
      path: /etc/kubernetes/scheduler.conf
      type: FileOrCreate
    name: kubeconfig
status: {}

#重启kubelet
[root@master01 manifests]# systemctl restart kubelet

#查看k8s集群服务为正常
[root@master01 manifests]# kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
etcd-2               Healthy   {"health":"true"}   
etcd-0               Healthy   {"health":"true"}   
etcd-1               Healthy   {"health":"true"}   
scheduler            Healthy   ok                  
controller-manager   Healthy   ok                  

#查看node
[root@master01 manifests]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
master01   NotReady    master   13m   v1.18.18

4. 安装flannel网络
在master01上新建flannel网络
[root@master01 ~]# wget https://raw.githubusercontent.com/coreos/flannel/2140ac876ef134e0ed5af15c65e414cf26827915/Documentation/kube-flannel.yml
[root@master01 ~]# kubectl apply -f kube-flannel.yml

#查看证书过期时间
[root@master01 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[check-expiration] Error reading configuration from the Cluster. Falling back to default configuration

W1207 10:20:28.086073    7242 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Nov 12, 2111 07:57 UTC   89y                                     no      
apiserver                  Nov 12, 2111 07:57 UTC   89y             ca                      no      
apiserver-etcd-client      Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
apiserver-kubelet-client   Nov 12, 2111 07:57 UTC   89y             ca                      no      
controller-manager.conf    Nov 12, 2111 07:57 UTC   89y                                     no      
etcd-healthcheck-client    Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
etcd-peer                  Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
etcd-server                Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
front-proxy-client         Nov 12, 2111 07:57 UTC   89y             front-proxy-ca          no      
scheduler.conf             Nov 12, 2111 07:57 UTC   89y                                     no      

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Nov 12, 2111 07:57 UTC   89y             no      
etcd-ca                 Nov 12, 2111 07:57 UTC   89y             no      
front-proxy-ca          Nov 12, 2111 07:57 UTC   89y             no  

5.把master01的证书发送到master02节点,下面这个脚本跟之前有点区别。
[root@master01 ~]# vi cert-main-master.sh
USER=root # customizable
CONTROL_PLANE_IPS="192.128.232.13"
for host in ${CONTROL_PLANE_IPS}; do
    ssh $host mkdir -p /etc/kubernetes/pki/etcd
    scp /etc/kubernetes/pki/ca.crt "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/ca.key "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/sa.key "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/sa.pub "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/front-proxy-ca.crt "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/front-proxy-ca.key "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/apiserver-etcd-client-key.pem "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/apiserver-etcd-client.pem "${USER}"@$host:/etc/kubernetes/pki/
    # Quote this line if you are using external etcd
    scp /etc/kubernetes/pki/etcd/ca.pem "${USER}"@$host:/etc/kubernetes/pki/etcd/
done


[root@master01 ~]# chmod +x cert-main-master.sh 

#发送证书
[root@master01 ~]# ./cert-main-master.sh 
ca.crt                                                                                                100% 1029   352.5KB/s   00:00    
ca.key                                                                                                100% 1675   117.9KB/s   00:00    
sa.key                                                                                                100% 1675     1.9MB/s   00:00    
sa.pub                                                                                                100%  451   396.2KB/s   00:00    
front-proxy-ca.crt                                                                                    100% 1038   258.7KB/s   00:00    
front-proxy-ca.key                                                                                    100% 1679    61.8KB/s   00:00    
apiserver-etcd-client-key.pem                                                                         100%  227    24.1KB/s   00:00    
apiserver-etcd-client.pem                                                                             100%  875   442.4KB/s   00:00    
ca.pem                                                                                                100% 1127    74.8KB/s   00:00    

7.master02加入到k8s集群
[root@master02 ~]# kubeadm join 192.128.232.15:6443 --token nxejnz.s3c66bpkw3aibbd5 \
    --discovery-token-ca-cert-hash sha256:9c6d3b715620fb601470d5186b46b9f693f06c7bba69e7e2e70c91b7791f7010 \
     --control-plane 
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
[mark-control-plane] Marking the node master02 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master02 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]

This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.


To start administering your cluster from this node, you need to run the following as a regular user:

        mkdir -p $HOME/.kube
        sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
        sudo chown $(id -u):$(id -g) $HOME/.kube/config

Run 'kubectl get nodes' to see this node join the cluster.

8.查看k8s集群服务为正常
[root@master02 ~]# rm /root/.kube/ -rf
[root@master02 ~]#  mkdir -p $HOME/.kube
[root@master02 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config

9.切换vip地址到master02
[root@master02 kubernetes]# ip addr|grep eth0
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    inet 192.128.232.13/24 brd 192.128.232.255 scope global noprefixroute eth0
    inet 192.128.232.15/24 scope global secondary eth0
    
[root@master02 keepalived]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
master01   Ready    master   57m   v1.18.18
master02   Ready    master   27m   v1.18.18

10.发现k8s集群服务有问题
[root@master02 keepalived]# kubectl get cs
NAME                 STATUS      MESSAGE                                                                                           ERROR
scheduler            Unhealthy   Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused         
controller-manager   Unhealthy   Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused         
etcd-2               Healthy   {"health":"true"}   
etcd-0               Healthy   {"health":"true"}   
etcd-1               Healthy   {"health":"true"}   
     
     
11.注释掉port
[root@master02 keepalived]# cd /etc/kubernetes/manifests/
[root@master02 manifests]# ll
总用量 12
-rw------- 1 root root 2789 12月  8 17:34 kube-apiserver.yaml
-rw------- 1 root root 2592 12月  8 17:34 kube-controller-manager.yaml
-rw------- 1 root root 1147 12月  8 17:34 kube-scheduler.yaml
[root@master02 manifests]# vi kube-controller-manager.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-controller-manager
    tier: control-plane
  name: kube-controller-manager
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-controller-manager
    - --allocate-node-cidrs=true
    - --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --bind-address=127.0.0.1
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --cluster-cidr=10.244.0.0/16
    - --cluster-name=kubernetes
    - --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
    - --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
    - --controllers=*,bootstrapsigner,tokencleaner
    - --kubeconfig=/etc/kubernetes/controller-manager.conf
    - --leader-elect=true
    - --node-cidr-mask-size=24
#    - --port=0
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
    - --root-ca-file=/etc/kubernetes/pki/ca.crt
    - --service-account-private-key-file=/etc/kubernetes/pki/sa.key
    - --service-cluster-ip-range=10.96.0.0/12
    - --use-service-account-credentials=true
    image: harbor.od.com/kubeadm/kube-controller-manager:v1.18.20
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10257
        scheme: HTTPS
      initialDelaySeconds: 15
      timeoutSeconds: 15
    name: kube-controller-manager
    resources:
      requests:
        cpu: 200m
    volumeMounts:
    - mountPath: /etc/ssl/certs
      name: ca-certs
      readOnly: true
    - mountPath: /etc/pki
      name: etc-pki
      readOnly: true
    - mountPath: /usr/libexec/kubernetes/kubelet-plugins/volume/exec
      name: flexvolume-dir
    - mountPath: /etc/kubernetes/pki
      name: k8s-certs
      readOnly: true
    - mountPath: /etc/kubernetes/controller-manager.conf
      name: kubeconfig
      readOnly: true
  hostNetwork: true
  priorityClassName: system-cluster-critical
  volumes:
  - hostPath:
      path: /etc/ssl/certs
      type: DirectoryOrCreate
      
#################################################################################################
[root@master02 manifests]# vi kube-scheduler.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-scheduler
    - --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
    - --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
    - --bind-address=127.0.0.1
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
#    - --port=0
    image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
      httpGet:
        host: 127.0.0.1
        path: /healthz
        port: 10259
        scheme: HTTPS
      initialDelaySeconds: 15
      timeoutSeconds: 15
    name: kube-scheduler
    resources:
      requests:
        cpu: 100m
    volumeMounts:
    - mountPath: /etc/kubernetes/scheduler.conf
      name: kubeconfig
      readOnly: true
  hostNetwork: true
  priorityClassName: system-cluster-critical
  volumes:
  - hostPath:
      path: /etc/kubernetes/scheduler.conf
      type: FileOrCreate
    name: kubeconfig
status: {}

########################################################################################################################
[root@master02 manifests]# systemctl restart kubelet
[root@master02 manifests]# kubectl get cs
NAME                 STATUS      MESSAGE                                                                                          
controller-manager   Healthy     ok                                                                                                
scheduler            Healthy     ok                                                                                                
etcd-2               Healthy     {"health":"true"}                                                                                 
etcd-1               Healthy     {"health":"true"}
etcd-0               Healthy     {"health":"true"}                                                                                

四.kube-proxy 开启 ipvs
1.修改ConfigMap的kube-system/kube-proxy中的config.conf,mode: “ipvs”
[root@master01 ~]# kubectl edit cm kube-proxy -n kube-system
...
    ipvs:
      excludeCIDRs: null
      minSyncPeriod: 0s
      scheduler: "nq"     #设置ipvs规则为nq
      strictARP: false
      syncPeriod: 30s
    kind: KubeProxyConfiguration
    metricsBindAddress: 127.0.0.1:10249
    mode: "ipvs"     #修改调度规则为ipvs
    nodePortAddresses: null
    oomScoreAdj: -999
    portRange: ""
    resourceContainer: /kube-proxy
...


2.对于Kubernetes来说,可以直接将这三个Pod删除之后,会自动重建。
[root@master01 ~]# kubectl get pods -n kube-system|grep proxy
kube-proxy-8kgdr                 1/1     Running   0          79m
kube-proxy-dq8zz                 1/1     Running   0          24m
kube-proxy-gdtqx                 1/1     Running   0          155m

3.批量删除 kube-proxy
[root@master01 ~]# kubectl get pod -n kube-system | grep kube-proxy | awk '{system("kubectl delete pod "$1" -n kube-system")}'

由于你已经通过ConfigMap修改了kube-proxy的配置,所以后期增加的Node节点,会直接使用ipvs模式。

4.查看日志
[root@master01 ~] kubectl get pods -n kube-system|grep proxy
kube-proxy-84mgz                 1/1     Running   0          16s
kube-proxy-r8sxj                 1/1     Running   0          15s
kube-proxy-wjdmp                 1/1     Running   0          12s

#日志中打印出了Using ipvs Proxier,说明ipvs模式已经开启。
[root@master01 ~]# kubectl logs -f kube-proxy-84mgz -n kube-system
I0827 04:59:16.916862       1 server_others.go:170] Using ipvs Proxier.
W0827 04:59:16.917140       1 proxier.go:401] IPVS scheduler not specified, use rr by default
I0827 04:59:16.917748       1 server.go:534] Version: v1.15.3
I0827 04:59:16.927407       1 conntrack.go:52] Setting nf_conntrack_max to 131072
I0827 04:59:16.929217       1 config.go:187] Starting service config controller
I0827 04:59:16.929236       1 controller_utils.go:1029] Waiting for caches to sync for service config controller
I0827 04:59:16.929561       1 config.go:96] Starting endpoints config controller
I0827 04:59:16.929577       1 controller_utils.go:1029] Waiting for caches to sync for endpoints config controller
I0827 04:59:17.029899       1 controller_utils.go:1036] Caches are synced for endpoints config controller
I0827 04:59:17.029954       1 controller_utils.go:1036] Caches are synced for service config controller

5.使用ipvsadm测试,可以查看之前创建的Service已经使用LVS创建了集群。

[root@master01 ~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  10.96.0.1:443 nq
  -> 1.1.1.101:6443               Masq    1      0          0
TCP  10.96.0.10:53 nq
  -> 10.244.0.2:53                Masq    1      0          0
  -> 10.244.2.8:53                Masq    1      0          0
TCP  10.96.0.10:9153 nq
  -> 10.244.0.2:9153              Masq    1      0          0
  -> 10.244.2.8:9153              Masq    1      0          0
UDP  10.96.0.10:53 nq
  -> 10.244.0.2:53                Masq    1      0          0
  -> 10.244.2.8:53                Masq    1      0          0

 
五,node01节点加入k8s集群,跟master加入不同,不用加参数  "--control-plane"
##当你的token忘了或者过期,解决办法如下:
#第一种情况,token过期
1.先获取token

#查看当前未过期token
[root@master01 ~]# kubeadm token list  | awk -F" " '{print $1}' |tail -n 1

#如果过期上面没有输出 ,可先执行此命令,创建新的token
[root@master01 ~]# kubeadm token create --print-join-command  

#第二种情况,token未过期
1.列出token
[root@master01 ~]# kubeadm token list  | awk -F" " '{print $1}' |tail -n 1
jd0u21.4ydhozszk7255xxb

2.获取CA公钥的哈希值
[root@master01 ~]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed  's/^ .* //'
(stdin)= d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc

3.从节点node01加入集群,在node01操作。
[root@node01 ~]# kubeadm join 192.128.232.15:6443 --token jd0u21.4ydhozszk7255xxb \
    --discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc 
    
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
W1208 18:22:47.238149    2116 join.go:346] [preflight] WARNING: JoinControlPane.controlPlane settings will be ignored when control-plane flag is not set.
[preflight] Running pre-flight checks
        [WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.6. Latest validated version: 19.03
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.18" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
 
2.在master01查看node01是否加入k8s集群了
[root@master01 ~]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
master01   Ready    master   79m   v1.18.18
master02   Ready    master   49m   v1.18.18
node01     Ready    <none>   43s   v1.18.18
 
3.测试k8s集群,部署一个deamonset,
[root@master02 ~]# cat nginx-ds.yaml 
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: nginx-ds
spec:
  selector:
    matchLabels:
      app: nginx-ds
  template:
    metadata:
      labels:
        app: nginx-ds
    spec:
      containers:
      - name: my-nginx
        image: nginx:alpine
        ports:
        - containerPort: 80
        
[root@master02 ~]# kubectl apply -f nginx-ds.yaml 
daemonset.apps/nginx-ds created

4,查看master是否有污点
[root@master02 ~]# kubectl describe node master01|grep Taint
Taints:             node-role.kubernetes.io/master:NoSchedule
 
[root@master02 ~]# kubectl describe node master02|grep Taint
Taints:             node-role.kubernetes.io/master:NoSchedule

5.pod被调度了node01,因为两台master都有污点。 
[root@master02 ~]# kubectl get pod -o wide
NAME             READY   STATUS    RESTARTS   AGE   IP           NODE     NOMINATED NODE   READINESS GATES
nginx-ds-pkqkj   1/1     Running   0          55s   10.244.2.2   node01   <none>           <none>

6.备份镜像
[root@node01 ~]# docker save $(docker images | grep -vE 'REPOSITORY | redis' | awk 'BEGIN{OFS=":";ORS=" "}{print $1,$2}') -o export.tar

7.导入到node节点
[root@node01 ~]# docker load -i  export.tar

8.删除节点
在master节点上执行:kubectl drain node01 --delete-local-data --force --ignore-daemonsets
[root@master01]# kubectl drain node-3 --delete-local-data --force --ignore-daemonsets
node/node-3 cordoned
WARNING: ignoring DaemonSet-managed Pods: kube-system/kube-flannel-ds-amd64-wmqnf, kube-system/kube-proxy-g456v
evicting pod "coredns-5c98db65d4-6lpr2"
evicting pod "nginx-deploy-7689897d8d-kfc7v"
pod/nginx-deploy-7689897d8d-kfc7v evicted
pod/coredns-5c98db65d4-6lpr2 evicted
node/node-3 evicted

[root@master01 ~]# kubectl get nodes
NAME       STATUS                   ROLES    AGE   VERSION
master01   Ready                    master   79m   v1.18.18
master02   Ready                    master   49m   v1.18.18
node01   Ready,SchedulingDisabled   <none>   50m   v1.18.18

[root@master01]# kubectl delete node node01

[root@master01]# kubectl get nodes

在 node01 上执行
[root@node01 ~]# kubeadm reset
[root@node01 ~]# ifconfig cni0 down
[root@node01 ~]# ip link delete cni0
[root@node01 ~]# ifconfig flannel.1 down
[root@node01 ~]# ip link delete flannel.1
[root@node01 ~]# rm -rf /var/lib/cni/
[root@node01 ~]# systemctl stop kubelet
 

 
 
 
 
 
 
 
 

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐