###################### 通过私有仓库安装k8s集群 ################################

#把master下载的镜像推送到自建harbor上面,这里需要提前在harbor仓库创建kubeadm项目。
#kubeadm安装k8s
k8s集群主机分布
192.128.232.9 nginx01,keepalived01
192.128.232.10 nginx02,keepalived02
192.128.232.11 master01,dns
192.128.232.12 master02
192.128.232.13 master03
192.128.232.14 node01
192.128.232.16 node02
192.128.232.17 node03
192.128.232.18 devops,harbor
192.128.232.15  vip地址

一:k8s初始化安装,更新repo源,所以节点都需要初始化.
[root@master ~]# yum install -y yum-utils device-mapper-persistent-data lvm2
[root@master ~]# yum install wget net-tools telnet tree nmap sysstat lrzsz dos2unix bind-utils ntpdate -y
[root@master ~]# yum -y install bash-completion
[root@master ~]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@master ~]# yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm

[root@master yum.repos.d]# cat > kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF


[root@master ~]# rm /etc/localtime -rf
[root@master ~]# ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
[root@master ~]# ntpdate 0.asia.pool.ntp.org

[root@master ~]# yum repolist && yum makecache fast

# Disable the SELinux.
[root@master01 ~]# sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config

#关闭swap
[root@master01 ~]# swapoff -a
[root@master01 ~]# sed -i.bak '/swap/s/^/#/' /etc/fstab

# Turn off and disable the firewalld.
[root@master ~]# systemctl stop firewalld
[root@master ~]# systemctl disable firewalld

[root@master01 ~]# ssh-keygen -f ~/.ssh/id_rsa -N ''
[root@master01 ~]# cat k8s_ip.txt 
192.128.232.9
192.128.232.10
192.128.232.11
192.128.232.12
192.128.232.13
192.128.232.14
192.128.232.16
192.128.232.17
192.128.232.18

[root@master01 ~]# for all_ip in `cat k8s_ip.txt`
do
   echo ">>> ${all_ip}"
   ssh-copy-id -i ~/.ssh/id_rsa.pub root@${all_ip}
done

二:安装dns服务
[root@master01 ~]#  yum install bind -y
[root@master01 ~]# cat /etc/named.conf 

options {
        listen-on port 53 { 192.128.232.11; };   #dns主机ip
        listen-on-v6 port 53 { ::1; };
        directory       "/var/named";
        dump-file       "/var/named/data/cache_dump.db";
        statistics-file "/var/named/data/named_stats.txt";
        memstatistics-file "/var/named/data/named_mem_stats.txt";
        recursing-file  "/var/named/data/named.recursing";
        secroots-file   "/var/named/data/named.secroots";
        allow-query     { any; };
        forwarders      { 192.128.232.2; };  #另外添加的,一般以网关转发,
        dnssec-enable no;  # 原本是yes
        dnssec-validation no;  # 原本是yes
        recursion yes;

#参数注解
named.conf文件内容解析:
   listen-on:监听端口,改为监听在内网,这样其它机器也可以用
   allow-query:哪些客户端能通过自建的DNS查
   forwarders:上级DNS是什么

# 检查修改情况,没有报错即可(即没有信息)
[root@master01 ~]# named-checkconf

#文件最后面添加下面内容,定义区域
[root@master01 ~]# cat >> /etc/named.rfc1912.zones <<EOF

#主机域
zone "host.com" IN {
        type  master;
        file  "host.com.zone";
        allow-update { 192.128.232.11; };
};
#业务域
zone "od.com" IN {
        type  master;
        file  "od.com.zone";
        allow-update { 192.128.232.11; };
};
EOF

#添加主机域文件
[root@master01 ~]# cat >> /var/named/host.com.zone <<EOF 
$ORIGIN host.com.
$TTL 600        ; 10 minutes
@       IN SOA  dns.host.com. dnsadmin.host.com. (
                                2020011201 ; serial
                                10800      ; refresh (3 hours)
                                900        ; retry (15 minutes)
                                604800     ; expire (1 week)
                                86400      ; minimum (1 day)
                                )
                        NS   dns.host.com.
$TTL 60 ; 1 minute
dns                     A    192.128.232.11
nginx01                 A    192.128.232.9
nginx02                 A    192.128.232.10
master01                A    192.128.232.11
master02                A    192.128.232.12
master03                A    192.128.232.13
node01                  A    192.128.232.16
node02                  A    192.128.232.17
EOF

#添加业务域文件
[root@master01 ~]# cat > /var/named/od.com.zone<<EOF 
$ORIGIN od.com.
$TTL 600        ; 10 minutes
@               IN SOA  dns.od.com. dnsadmin.od.com. (
                                2020011201 ; serial
                                10800      ; refresh (3 hours)
                                900        ; retry (15 minutes)
                                604800     ; expire (1 week)
                                86400      ; minimum (1 day)
                                )
                                NS   dns.od.com.
$TTL 60 ; 1 minute
dns                A    192.128.232.11
EOF

# 看一下有没有报错
[root@master01 ~]# named-checkconf
[root@master01 ~]# systemctl start named
[root@master01 ~]# netstat -luntp|grep 53

#参数注解
   TTL 600:指定IP包被路由器丢弃之前允许通过的最大网段数量
   10 minutes:过期时间10分钟
   SOA:一个域权威记录的相关信息,后面有5组参数分别设定了该域相关部分
   dnsadmin.od.com. 一个假的邮箱
   serial:记录的时间
   $ORIGIN:即下列的域名自动补充od.com,如dns,外面看来是dns.od.com
   netstat -luntp:显示 tcp,udp 的端口和进程等相关情况

# master01机器,检查主机域是否解析
[root@master01 ~]# dig -t A master01.host.com @192.128.232.11 +short
192.128.232.11

# 配置linux客户端和win客户端都能使用这个服务,修改,定义dns解析ip为自建dns服务器
[root@master01 ~]# vi /etc/sysconfig/network-scripts/ifcfg-eth0
DNS1=192.128.232.11    #注意是DNS1,如果DNS有可能失败后面测试


[root@master01 ~]# systemctl restart network

/etc/resolv.conf文件里会自动生成 nameserver 192.128.232.11 内容,

[root@master01 ~]# cat /etc/resolv.conf
# Generated by NetworkManager
nameserver 192.128.232.11


[root@master01 ~]# ping www.baidu.com
[root@master01 ~]# ping master01.host.com

三:安装harbor私有仓库
#安装harbor私有仓库,安装在192.128.232.18
[root@devops ~]# cd /opt
[root@devops ~]# mkdir src
[root@devops ~]# cd src/
# 可以去这个地址下载,也可以直接用我用的软件包
https://github.com/goharbor/harbor/releases
[root@devops ~]# src]# tar xf harbor-offline-installer-v2.0.1.tgz -C /opt/
[root@devops ~]# cd /opt/harbor/
[root@devops harbor]# cp harbor.yml.tmpl harbor.yml

#修改harbor配置文件,修改下面四个选项
[root@devops harbor]# vi harbor.yml
hostname: harbor.od.com  
http:
  port: 180
data_volume: /data/harbor
location: /data/harbor/logs

#安装harbor私有仓库
[root@devops harbor]# mkdir -p /data/harbor/logs
[root@devops harbor]# yum install docker-compose -y
[root@devops harbor]# ./install.sh

[root@devops harbor]# docker-compose ps
[root@devops harbor]# docker ps -a
[root@devops harbor]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
[root@devops harbor]# yum -y install nginx

###相关报错问题:
yum的时候报:/var/run/yum.pid 已被锁定,PID 为 1610 的另一个程序正在运行。
另外一个程序锁定了 yum;等待它退出……
网上统一的解决办法:直接在终端运行 rm -f /var/run/yum.pid 将该文件删除,然后再次运行yum。
###

[root@devops harbor]# vi /etc/nginx/conf.d/harbor.od.com.conf
server {
    listen       80;
    server_name  harbor.od.com;
    client_max_body_size 1000m;
    location / {
        proxy_pass http://127.0.0.1:180;
    }
}

[root@devops harbor]# nginx -t
[root@devops harbor]# systemctl start nginx
[root@devops harbor]# systemctl enable nginx

# 在自建DNS12机器解析域名:
[root@master01 ~]# vi /var/named/od.com.zone
# 注意serial前滚一个序号
# 最下面添加域名
harbor             A    192.128.232.18

[root@master01 ~]# systemctl restart named
[root@master01 ~]# dig -t A harbor.od.com +short
192.128.232.18

#在浏览器上打开harbor.od.com,并创建kubeadm项目
账号:admin
密码:Harbor12345

四:安装docker环境,集群每个节点
#kubeadm,kubelet,docker-ce,kubectl四个程序都安装同一个版本
[root@master ~]# yum list kubelet --showduplicates | sort -r
[root@master ~]# yum -y install kubectl-1.18.18 kubelet-1.18.18 kubeadm-1.18.18 docker-ce-20.10.6

[root@master ~]# modprobe br_netfilter
[root@master ~]# cat > /etc/sysctl.d/kubernetes.conf << EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF

[root@master ~]# sysctl -p /etc/sysctl.d/kubernetes.conf

[root@master ~]# systemctl enable docker && systemctl start docker && systemctl enable kubelet

// 设置 registry-mirrors 镜像加速器,可以提升获取 docker 官方镜像的速度
// 设置 cgroupdriver 为 systemd,和 kubelet 的保持一致
[root@master ~]# mkdir /data/docker -p
[root@master ~]# cat > /etc/docker/daemon.json << EOF
{
  "registry-mirrors": ["https://4wvlvmti.mirror.aliyuncs.com"],
  "storage-driver": "overlay2",
  "insecure-registries": ["registry.access.redhat.com","quay.io","harbor.od.com"],
  "graph": "/data/docker",
  "exec-opts": ["native.cgroupdriver=systemd"],
  "live-restore": true
}
EOF
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart docker

[root@master ~]# yum -y install ipvsadm ipset sysstat conntrack libseccomp

[root@master ~]# hostnamectl set-hostname master01
[root@master01 ~]# cat >> /etc/hosts << EOF
192.128.232.9  nginx01
192.128.232.10 nginx02
192.128.232.11 master01
192.128.232.12 master02
192.128.232.13 master03
192.128.232.14 node01
192.128.232.16 node02
192.128.232.17 node03
EOF

[root@master01 ~]# cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/sh
modprobe -- ip_vs
modprobe -- ip_vs_nq
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF

[root@master01 ~]# chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh               12688  0 
ip_vs_wrr              12697  0 
ip_vs_rr               12600  0 
ip_vs                 145458  6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack_ipv4      15053  2 
nf_defrag_ipv4         12729  1 nf_conntrack_ipv4
nf_conntrack          139264  7 ip_vs,nf_nat,nf_nat_ipv4,xt_conntrack,nf_nat_masquerade_ipv4,nf_conntrack_netlink,nf_conntrack_ipv4
libcrc32c              12644  4 xfs,ip_vs,nf_nat,nf_conntrack

#关闭swap
[root@master ~]# rpm -ql kubelet
/etc/kubernetes/manifests
/etc/sysconfig/kubelet
/usr/bin/kubelet
/usr/lib/systemd/system/kubelet.service

[root@master ~]# cat > /etc/sysconfig/kubelet << EOF
KUBELET_EXTRA_ARGS="--fail-swap-on=false"
EOF

#设置开机自启kubelet,但不启动
[root@master ~]# systemctl enable kubelet docker
[root@master ~]# systemctl start docker

[root@master01 ~]# systemctl enable kubelet && systemctl start kubelet

# kubectl命令补全
[root@master01 ~]# echo "source <(kubectl completion bash)" >> ~/.bash_profile
[root@master01 ~]# source ~/.bash_profile

五:二台nginx安装keepalived,nginx
[root@nginx01 ~]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
[root@nginx01 ~]# yum -y install nginx keepalived

[root@nginx01 ~]# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
   router_id 192.128.232.9
}
vrrp_script chk_nginx {
    script "/etc/keepalived/check_port.sh 80"
    interval 2
    weight -20
}
vrrp_instance VI_1 {
    state BACKUP    #全部节点设置BACKUP,以优先级高暂时为master。
    interface eth0      #对应主机的网卡名称
    virtual_router_id 251   #id号要一致
    priority 100
    advert_int 1
    nopreempt     #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
    unicast_peer {        #写上其他安装keepalived主机的ip
        192.128.232.10
    }
    authentication {
        auth_type PASS
        auth_pass 11111111
    }
    track_script {
         chk_nginx
    }
    virtual_ipaddress {
        192.128.232.15/24    #vip地址
    }
}


[root@nginx01 ~]# vi /etc/keepalived/check_port.sh
#!/bin/bash
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
#    script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
#    interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
         PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
        echo $PORT_PROCESS
        if [ $PORT_PROCESS -eq 0 ];then
                echo "Port $CHK_PORT Is Not Used,End."
                systemctl stop keepalived
        fi
else
        echo "Check Port Cant Be Empty!"
fi


[root@nginx01 kubernetes]# systemctl restart keepalived
[root@nginx01 kubernetes]# systemctl enable keepalived

#nginx配置转发apiservice
[root@nginx01 kubernetes]# cat >> /etc/nginx/nginx.conf<<EOF
stream {
    upstream kube-apiserver {
        server 192.128.232.11:6443     max_fails=3 fail_timeout=30s;
        server 192.128.232.12:6443     max_fails=3 fail_timeout=30s;
        server 192.128.232.13:6443     max_fails=3 fail_timeout=30s;
    }
    server {
        listen 6443;
        proxy_connect_timeout 2s;
        proxy_timeout 900s;
        proxy_pass kube-apiserver;
    }
}
EOF

[root@nginx01 kubernetes]# nginx -t
[root@nginx01 kubernetes]# systemctl start nginx
[root@nginx01 kubernetes]# systemctl enable nginx

#############################################################################
#nginx02的keepalived配置
[root@nginx02 .ssh]# vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
   router_id 192.128.232.10
}
vrrp_script chk_nginx {
    script "/etc/keepalived/check_port.sh 80"
    interval 2
    weight -20
}
vrrp_instance VI_1 {
    state BACKUP    #全部节点设置BACKUP,以优先级高暂时为master。
    interface eth0      #对应主机的网卡名称
    virtual_router_id 251   #id号要一致
    priority 90
    advert_int 1
    nopreempt     #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
    unicast_peer {        #写上其他安装keepalived主机的ip
        192.128.232.9
    }
    authentication {
        auth_type PASS
        auth_pass 11111111
    }
    track_script {
         chk_nginx
    }
    virtual_ipaddress {
        192.128.232.15/24    #vip地址
    }
}


[root@nginx02 ~]# vi /etc/keepalived/check_port.sh
#!/bin/bash
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
#    script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
#    interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
         PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
        echo $PORT_PROCESS
        if [ $PORT_PROCESS -eq 0 ];then
                echo "Port $CHK_PORT Is Not Used,End."
                systemctl stop keepalived
        fi
else
        echo "Check Port Cant Be Empty!"
fi


[root@nginx02 kubernetes]# systemctl restart keepalived
[root@nginx02 kubernetes]# systemctl enable keepalived

#nginx配置转发apiservice
[root@nginx02 kubernetes]# cat >> /etc/nginx/nginx.conf <<EOF
stream {
    upstream kube-apiserver {
        server 192.128.232.11:6443     max_fails=3 fail_timeout=30s;
        server 192.128.232.12:6443     max_fails=3 fail_timeout=30s;
        server 192.128.232.13:6443     max_fails=3 fail_timeout=30s;
    }
    server {
        listen 6443;
        proxy_connect_timeout 2s;
        proxy_timeout 900s;
        proxy_pass kube-apiserver;
    }
}
EOF

[root@nginx02 kubernetes]# nginx -t
[root@nginx02 kubernetes]# systemctl start nginx
[root@nginx02 kubernetes]# systemctl enable nginx

###################################################################################################################################

#部署k8s之前,需要重新编译kubeadm,因为kubeadm默认证书有效期是1年

六:编译kubeadm版本,更新k8s证书有效期
获取源码
访问:https://github.com/kubernetes...
#或者去官方网下载对应版本的地址:kubernetes跟kubeadm安装版本一致,否则替换出问题。

https://github.com/kubernetes/kubernetes/archive/refs/tags/kubernetes-1.18.18.zip

下载对应的版本,
[root@master01 k8s]# mkdir /data/k8s/
[root@master01 k8s]# cd /data/k8s/
[root@master01 k8s]# wget https://github.com/kubernetes/kubernetes/archive/refs/tags/kubernetes-1.18.18.zip
[root@master01 k8s]# unzip kubernetes-1.18.18.zip
[root@master01 k8s]# ln -sv kubernetes-1.18.18 kubernetes
 

####### 修改证书有效期代码

### 修改 CA 有效期为 90 年(默认为 10 年)
[root@master01 k8s]# cd /data/k8s/kubernetes
[root@master01 k8s]# vi ./staging/src/k8s.io/client-go/util/cert/cert.go

// 这个方法里面NotAfter:              now.Add(duration365d * 10).UTC()
// 默认有效期就是10年,改成90年
// 按/NotAfter查找
func NewSelfSignedCACert(cfg Config, key crypto.Signer) (*x509.Certificate, error) {
        now := time.Now()
        tmpl := x509.Certificate{
                SerialNumber: new(big.Int).SetInt64(0),
                Subject: pkix.Name{
                        CommonName:   cfg.CommonName,
                        Organization: cfg.Organization,
                },
                NotBefore:             now.UTC(),
                // NotAfter:              now.Add(duration365d * 10).UTC(),
                NotAfter:              now.Add(duration365d * 90).UTC(),     #修改证书有效期90年
                KeyUsage:              x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
                BasicConstraintsValid: true,
                IsCA:                  true,
        }

        certDERBytes, err := x509.CreateCertificate(cryptorand.Reader, &tmpl, &tmpl, key.Public(), key)
        if err != nil {
                return nil, err
        }
        return x509.ParseCertificate(certDERBytes)
}

修改证书有效期为 90 年(默认为 1 年)
[root@master01 k8s]# cd /data/k8s/kubernetes
[root@master01 k8s]# vi ./cmd/kubeadm/app/constants/constants.go


// 就是这个常量定义CertificateValidity,改成* 90年
const (
        // KubernetesDir is the directory Kubernetes owns for storing various configuration files
        KubernetesDir = "/etc/kubernetes"
        // ManifestsSubDirName defines directory name to store manifests
        ManifestsSubDirName = "manifests"
        // TempDirForKubeadm defines temporary directory for kubeadm
        // should be joined with KubernetesDir.
        TempDirForKubeadm = "tmp"

        // CertificateValidity defines the validity for all the signed certificates generated by kubeadm
        // CertificateValidity = time.Hour * 24 * 365
        CertificateValidity = time.Hour * 24 * 365 * 90    #修改证书有效期90年

        // CACertAndKeyBaseName defines certificate authority base name
        CACertAndKeyBaseName = "ca"
        // CACertName defines certificate name
        CACertName = "ca.crt"
        // CAKeyName defines certificate name
        CAKeyName = "ca.key"
        
        
源代码改好了,接下来就是编译kubeadm了

3. 编译
软件包准备,需要go环境
CentOS:
[root@master01 kubernetes]# yum install gcc make rsync jq -y

GoLang 环境
查看 kube-cross 的 TAG 版本号
[root@master01 kubernetes]# cd /data/k8s/kubernetes
[root@master01 kubernetes]# cat ./build/build-image/cross/VERSION
v1.15.10-legacy-1

下载go版本
安装Go环境:
官方地址:https://golang.google.cn/dl/

[root@master01 kubernetes]# cd /usr/local/
[root@master01 local]# wget https://golang.google.cn/dl/go1.15.10.linux-amd64.tar.gz
[root@master01 local]# tar -zxf go1.15.10.linux-amd64.tar.gz 

#配置go环境
[root@master01 local]# cat >> /etc/profile <<EOF
#go setting
export GOROOT=/usr/local/go
export PATH=$PATH:$GOROOT/bin
export GOPATH=/root/go
export PATH=$PATH:$GOPATH/BIN
EOF

#生效
[root@master01 local]# source /etc/profile

# 编译kubeadm, 这里主要编译kubeadm 即可,
  如果全部更新,就使用命令:make cross
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# make all WHAT=cmd/kubeadm GOFLAGS=-v

# 编译kubelet
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# make all WHAT=cmd/kubelet GOFLAGS=-v

# 编译kubectl
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# make all WHAT=cmd/kubectl GOFLAGS=-v

#kubeadm替换
[root@master01 local]# mv /usr/bin/kubeadm /usr/bin/kubeadm_backup
[root@master01 local]# cd /data/k8s/kubernetes
[root@master01 local]# cp _output/local/bin/linux/amd64/kubeadm /usr/bin/kubeadm
[root@master01 local]# chmod +x /usr/bin/kubeadm


#查看kubeadm版本
[root@master01 ~]# kubeadm version
kubeadm version: &version.Info{Major:"1", Minor:"18", GitVersion:"v1.18.18", 

七:下载对应的版本,打标签上传到私有仓库harbor
如果k8s.gcr.io下载不了,可以使用harbor进行下载镜像安装master,下面就是测试使用harbor安装k8s集群。
#查看kubeadm config所需的镜像
[root@master01 ~]# kubeadm config images list
I1203 15:19:52.696209   21678 version.go:255] remote version is much newer: v1.22.4; falling back to: stable-1.18
W1203 15:19:53.163851   21678 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
k8s.gcr.io/kube-apiserver:v1.18.20
k8s.gcr.io/kube-controller-manager:v1.18.20
k8s.gcr.io/kube-scheduler:v1.18.20
k8s.gcr.io/kube-proxy:v1.18.20
k8s.gcr.io/pause:3.2
k8s.gcr.io/etcd:3.4.3-0
k8s.gcr.io/coredns:1.6.7

#下面镜像
[root@master01 ~]# kubeadm config images push

[root@master01 ~]# docker images|grep k8s >k8s_images.txt 

[root@master01 ~]# vi k8s_images_tag_push.sh
#!/bin/bash
cat /root/k8s_images.txt |while read line
do 
  ImageId=`echo $line|awk '{print $3}'`
  ImageName=`echo $line|awk -F'/' '{print $2}'|awk '{print $1}'`
  ImageVersion=`echo $line|awk '{print $2}'`
  docker tag $ImageId harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
  docker push harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
done


[root@master01 ~]# chmod +x k8s_images_tag_push.sh 
#登录harbor.od.com.由于harbor这个是不安全的,所以在安装docker的时候,在 /etc/docker/daemon.json文件里,
添加了 "insecure-registries": ["registry.access.redhat.com","quay.io","harbor.od.com"] ,这里定义了,所以没有配置https也是可以的。

#登录harbor仓库
[root@master01 ~]# docker login harbor.od.com
Username: admin
Password: 
WARNING! Your password will be stored unencrypted in /root/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded

#上传镜像后。查看harbor仓库是否有了。
[root@master01 ~]# ./k8s_images_tag_push.sh 


#修改初始化文件,指定harbor仓库下载镜像
[root@master01 ~]# kubeadm config print init-defaults ClusterConfiguration >kubeadm-config.yaml

#上面输出内容跟下面有所区别,两者结合修改
[root@master01 ~]# cat kubeadm-config.yaml 
apiVersion: kubeadm.k8s.io/v1beta2   #根据kubeadm config print init-defaults ClusterConfiguration 执行的输出内容一致。
kind: ClusterConfiguration
kubernetesVersion: v1.18.20    #这里要特别注意,要跟kubeadm config images list 拉取的版本一致,是v1.18.20,跟k8s.gcr.io拉取镜像有点区别。因为这里指定版本,就会去harbor拉取指定版本。
imageRepository: harbor.od.com/kubeadm     #指定harbor仓库kubeadm项目下载镜像
apiServer:
  certSANs:    #填写所有kube-apiserver节点的hostname、IP、VIP
  - master01
  - master02
  - master03
  - node01
  - node02
  - node03
  - 192.128.232.11
  - 192.128.232.12
  - 192.128.232.13
  - 192.128.232.14
  - 192.128.232.15
  - 192.128.232.16
  - 192.128.232.17
  - 192.128.232.18
  - 192.128.232.19
  - 192.128.232.20
controlPlaneEndpoint: "192.128.232.15:6443"   #vip地址
networking:
  podSubnet: "10.244.0.0/16"
  serviceSubnet: "10.96.0.0/12"
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
featureGates:
  SupportIPVSProxyMode: true
mode: ipvs  # kube-proxy 模式

#初始化k8s集群。同时要注意,vip一定要跟初始化机器在同一台上面。
[root@master01 ~]# kubeadm init --config kubeadm-config.yaml 
To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:

  kubeadm join 192.128.232.15:6443 --token pqpfkz.0gdupmp5uk22ym6c \
    --discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc \
    --control-plane 

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 192.128.232.15:6443 --token pqpfkz.0gdupmp5uk22ym6c \
    --discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc 


#查看docker的镜像,果然都是从harbor私有仓库下载。
[root@master01 ~]# docker images
REPOSITORY                                      TAG        IMAGE ID       CREATED         SIZE
harbor.od.com/kubeadm/kube-proxy                v1.18.20   27f8b8d51985   5 months ago    117MB
harbor.od.com/kubeadm/kube-apiserver            v1.18.20   7d8d2960de69   5 months ago    173MB
harbor.od.com/kubeadm/kube-controller-manager   v1.18.20   e7c545a60706   5 months ago    162MB
harbor.od.com/kubeadm/kube-scheduler            v1.18.20   a05a1a79adaa   5 months ago    96.1MB
harbor.od.com/kubeadm/pause                     3.2        80d28bedfe5d   21 months ago   683kB
harbor.od.com/kubeadm/coredns                   1.6.7      67da37a9a360   22 months ago   43.8MB
harbor.od.com/kubeadm/etcd                      3.4.3-0    303ce5db0e90   2 years ago     288MB

[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config

[root@master01 ~]# kubectl get cs
NAME                 STATUS      MESSAGE                                                                                     ERROR
controller-manager   Unhealthy   Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused   
scheduler            Unhealthy   Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused   
etcd-0               Healthy     {"health":"true"}                                                                           

[root@master01 ~]# vi /etc/kubernetes/manifests/kube-scheduler.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-scheduler
    - --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
    - --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
    - --bind-address=127.0.0.1
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
#    - --port=0    #注释这行
    image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
    imagePullPolicy: IfNotPresent

[root@master01 ~]# vi /etc/kubernetes/manifests/kube-controller-manager.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-controller-manager
    tier: control-plane
  name: kube-controller-manager
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-controller-manager
    - --allocate-node-cidrs=true
    - --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --bind-address=127.0.0.1
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --cluster-cidr=10.244.0.0/16
    - --cluster-name=kubernetes
    - --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
    - --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
    - --controllers=*,bootstrapsigner,tokencleaner
    - --kubeconfig=/etc/kubernetes/controller-manager.conf
    - --leader-elect=true
    - --node-cidr-mask-size=24
#    - --port=0  #注释这行
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
    - --root-ca-file=/etc/kubernetes/pki/ca.crt
 

[root@master01 ~]# systemctl restart kubelet
[root@master01 ~]# kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
etcd-0               Healthy   {"health":"true"}   
scheduler            Healthy   ok                  
controller-manager   Healthy   ok     

[root@master01 ~]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
master01   Ready    master   58m   v1.18.18

3. 加载环境变量
[root@master01 ~]# echo "export KUBECONFIG=/etc/kubernetes/admin.conf" >> ~/.bash_profile
[root@master01 ~]# source .bash_profile
本文所有操作都在root用户下执行,若为非root用户,则执行如下操作:

mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config

4. 安装flannel网络
在master01上新建flannel网络
[root@master01 ~]# wget https://raw.githubusercontent.com/coreos/flannel/2140ac876ef134e0ed5af15c65e414cf26827915/Documentation/kube-flannel.yml
[root@master01 ~]# kubectl apply -f kube-flannel.yml

#查看证书过期时间
[root@master01 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[check-expiration] Error reading configuration from the Cluster. Falling back to default configuration

W1207 10:20:28.086073    7242 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Nov 12, 2111 07:57 UTC   89y                                     no      
apiserver                  Nov 12, 2111 07:57 UTC   89y             ca                      no      
apiserver-etcd-client      Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
apiserver-kubelet-client   Nov 12, 2111 07:57 UTC   89y             ca                      no      
controller-manager.conf    Nov 12, 2111 07:57 UTC   89y                                     no      
etcd-healthcheck-client    Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
etcd-peer                  Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
etcd-server                Nov 12, 2111 07:57 UTC   89y             etcd-ca                 no      
front-proxy-client         Nov 12, 2111 07:57 UTC   89y             front-proxy-ca          no      
scheduler.conf             Nov 12, 2111 07:57 UTC   89y                                     no      

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Nov 12, 2111 07:57 UTC   89y             no      
etcd-ca                 Nov 12, 2111 07:57 UTC   89y             no      
front-proxy-ca          Nov 12, 2111 07:57 UTC   89y             no      

5.备份镜像
[root@node01 ~]# docker save $(docker images | grep -vE 'REPOSITORY | redis' | awk 'BEGIN{OFS=":";ORS=" "}{print $1,$2}') -o export.tar

#导入到node节点
[root@node01 ~]# docker load -i  export.tar

二:安装master02节点
1.证书分发,master01操作
#master01发放证书到devops
#分发证书到其它两个master节点
[root@master01 ~]# cat >cert-main-master.sh<<EOF 
USER=root # customizable
CONTROL_PLANE_IPS="192.128.232.12 192.128.232.13"   #分发到其它两个master节点
for host in ${CONTROL_PLANE_IPS}; do
    ssh $host mkdir -p /etc/kubernetes/pki/etcd
    scp /etc/kubernetes/pki/ca.crt "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/ca.key "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/sa.key "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/sa.pub "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/front-proxy-ca.crt "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/front-proxy-ca.key "${USER}"@$host:/etc/kubernetes/pki/
    scp /etc/kubernetes/pki/etcd/ca.crt "${USER}"@$host:/etc/kubernetes/pki/etcd/
    # Quote this line if you are using external etcd
    scp /etc/kubernetes/pki/etcd/ca.key "${USER}"@$host:/etc/kubernetes/pki/etcd/
done

[root@master01 ~]# ./cert-main-master.sh 
ca.crt                                                                                  100% 1029   967.6KB/s   00:00    
ca.key                                                                                  100% 1679     1.5MB/s   00:00    
sa.key                                                                                  100% 1675   128.8KB/s   00:00    
sa.pub                                                                                  100%  451   182.4KB/s   00:00    
front-proxy-ca.crt                                                                      100% 1038     1.1MB/s   00:00    
front-proxy-ca.key                                                                      100% 1675    65.3KB/s   00:00    
ca.crt                                                                                  100% 1021    91.0KB/s   00:00    
ca.key                                                                                  100% 1679   718.3KB/s   00:00    


2.登录harbor,master02操作
[root@master02 ~]# docker login harbor.od.com
Username: admin
Password: 
WARNING! Your password will be stored unencrypted in /root/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded

#拷贝编译好的kubeadm到集群其它节点
[root@master02 ~]# scp master01:/usr/bin/kubeadm /usr/bin/kubeadm 

#devops加入集群,
[root@master02 ~]# kubeadm join 192.128.232.15:6443 --token pqpfkz.0gdupmp5uk22ym6c \
    --discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc \
    --control-plane
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.

To start administering your cluster from this node, you need to run the following as a regular user:

        mkdir -p $HOME/.kube
        sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
        sudo chown $(id -u):$(id -g) $HOME/.kube/config

Run 'kubectl get nodes' to see this node join the cluster.

#上面成功加入到master集群里,devops节点上面镜像是从harbor私有仓库下载的。

[root@master02 ~]# mkdir -p $HOME/.kube
[root@master02 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config

[root@master02 ~]# docker images|grep harbor.od.com
harbor.od.com/kubeadm/kube-proxy                v1.18.20        27f8b8d51985   5 months ago    117MB
harbor.od.com/kubeadm/kube-apiserver            v1.18.20        7d8d2960de69   5 months ago    173MB
harbor.od.com/kubeadm/kube-scheduler            v1.18.20        a05a1a79adaa   5 months ago    96.1MB
harbor.od.com/kubeadm/kube-controller-manager   v1.18.20        e7c545a60706   5 months ago    162MB
harbor.od.com/kubeadm/pause                     3.2             80d28bedfe5d   22 months ago   683kB
harbor.od.com/kubeadm/coredns                   1.6.7           67da37a9a360   22 months ago   43.8MB
harbor.od.com/kubeadm/etcd                      3.4.3-0         303ce5db0e90   2 years ago     288MB

3.修改两台nginx的apiserver转发地址,只转发到master02节点。
[root@nginx01 kubernetes]# vi /etc/nginx/nginx.conf
stream {
    upstream kube-apiserver {
       # server 192.128.232.11:6443     max_fails=3 fail_timeout=30s;
        server 192.128.232.12:6443     max_fails=3 fail_timeout=30s;
       # server 192.128.232.13:6443     max_fails=3 fail_timeout=30s;
    }
    server {
        listen 6443;
        proxy_connect_timeout 2s;
        proxy_timeout 900s;
        proxy_pass kube-apiserver;
    }
}

[root@nginx01 kubernetes]# systemctl restart nginx

4.在master02操作
[root@master02 ~]# kubectl get cs
NAME                 STATUS      MESSAGE                                                                                     ERROR
controller-manager   Unhealthy   Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused   
scheduler            Unhealthy   Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused   
etcd-0               Healthy     {"health":"true"}   

[root@master02 ~]# vi /etc/kubernetes/manifests/kube-scheduler.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-scheduler
    - --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
    - --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
    - --bind-address=127.0.0.1
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
#    - --port=0    #注释这行
    image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
    imagePullPolicy: IfNotPresent
   

[root@master02 ~]# vi /etc/kubernetes/manifests/kube-controller-manager.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-controller-manager
    tier: control-plane
  name: kube-controller-manager
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-controller-manager
    - --allocate-node-cidrs=true
    - --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
    - --bind-address=127.0.0.1
    - --client-ca-file=/etc/kubernetes/pki/ca.crt
    - --cluster-cidr=10.244.0.0/16
    - --cluster-name=kubernetes
    - --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
    - --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
    - --controllers=*,bootstrapsigner,tokencleaner
    - --kubeconfig=/etc/kubernetes/controller-manager.conf
    - --leader-elect=true
    - --node-cidr-mask-size=24
#    - --port=0  #注释这行
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
    - --root-ca-file=/etc/kubernetes/pki/ca.crt
    - --service-account-private-key-file=/etc/kubernetes/pki/sa.key
    - --service-cluster-ip-range=10.96.0.0/12
    - --use-service-account-credentials=true
    image: harbor.od.com/kubeadm/kube-controller-manager:v1.18.20
    imagePullPolicy: IfNotPresent
    

[root@master02 ~]# systemctl restart kubelet
[root@master02 ~]# kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
etcd-0               Healthy   {"health":"true"}   
scheduler            Healthy   ok                  
controller-manager   Healthy   ok     

[root@master02 ~]# kubectl get nodes
NAME       STATUS   ROLES    AGE     VERSION
master01   Ready    master   141m    v1.18.18
master02   Ready    master   8m57s   v1.18.18

[root@master02 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[check-expiration] Error reading configuration from the Cluster. Falling back to default configuration

W1207 10:31:58.191758   18746 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Nov 12, 2111 10:11 UTC   89y                                     no      
apiserver                  Nov 12, 2111 10:11 UTC   89y             ca                      no      
apiserver-etcd-client      Nov 12, 2111 10:11 UTC   89y             etcd-ca                 no      
apiserver-kubelet-client   Nov 12, 2111 10:11 UTC   89y             ca                      no      
controller-manager.conf    Nov 12, 2111 10:11 UTC   89y                                     no      
etcd-healthcheck-client    Nov 12, 2111 10:11 UTC   89y             etcd-ca                 no      
etcd-peer                  Nov 12, 2111 10:11 UTC   89y             etcd-ca                 no      
etcd-server                Nov 12, 2111 10:11 UTC   89y             etcd-ca                 no      
front-proxy-client         Nov 12, 2111 10:11 UTC   89y             front-proxy-ca          no      
scheduler.conf             Nov 12, 2111 10:11 UTC   89y                                     no      

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Nov 12, 2111 07:57 UTC   89y             no      
etcd-ca                 Nov 12, 2111 07:57 UTC   89y             no      
front-proxy-ca          Nov 12, 2111 07:57 UTC   89y             no      

#跟master02同样的操作把master03加入k8s集群

[root@master02 ~]# kubectl get nodes
NAME       STATUS     ROLES    AGE   VERSION
master01   Ready      master   35m   v1.18.18
master02   Ready      master   28m   v1.18.18
master03   Ready      master   23m   v1.18.18

结论:上面就是master节点的高可用部署全部过程。

五:故障演练
1.关闭master01节点,看集群是否能继续使用

[root@master02 ~]# kubectl get cs
NAME                 STATUS    MESSAGE             
scheduler            Healthy   ok                  
controller-manager   Healthy   ok                  
etcd-0               Healthy   {"health":"true"}   

#查看master节点,master01节点出现问题了
[root@devops ~]# kubectl get nodes
NAME       STATUS     ROLES    AGE   VERSION
master01   NotReady   master   35m   v1.18.18
master02   Ready      master   28m   v1.18.18
master03   Ready      master   23m   v1.18.18

[root@devops ~]# kubectl get pods -n kube-system
NAME                               READY   STATUS             RESTARTS   AGE
coredns-55b448c5dd-48hf4           1/1     Running            0          17m
coredns-55b448c5dd-8bs9b           1/1     Running            0          17m
etcd-master01                      1/1     Running            0          17m
etcd-master02                      1/1     Running            0          10m
etcd-master03                      1/1     Running            0          6m
kube-apiserver-master01            1/1     Running            0          17m
kube-apiserver-master02            1/1     Running            0          10m
kube-apiserver-master03            1/1     Running            0          6m1s
kube-controller-manager-master01   0/1     CrashLoopBackOff   1          15m     
kube-controller-manager-master02   1/1     Running            1          7m22s
kube-controller-manager-master03   1/1     Running            0          3m26s
kube-flannel-ds-amd64-75z97        1/1     Running            0          6m9s
kube-flannel-ds-amd64-7q6pz        1/1     Running            1          10m
kube-flannel-ds-amd64-fg8zb        1/1     Running            0          13m
kube-proxy-9l66g                   1/1     Running            0          10m
kube-proxy-sjx5q                   1/1     Running            0          6m9s
kube-proxy-wc7gb                   1/1     Running            0          17m
kube-scheduler-master01            1/1     Running            2          15m
kube-scheduler-master02            1/1     Running            1          7m22s
kube-scheduler-master03            1/1     Running            0          3m26s

#先删除这个master节点,再查看master01节点的具体问题,重新加入k8s集群
[root@devops ~]# kubectl delete node master01
node "master01" deleted
[root@devops ~]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
master02   Ready    master   34m   v1.18.18
master03   Ready    master   29m   v1.18.18

#这里master01节点容器正在Terminating,
[root@master01 ~]# kubectl get pods -n kube-system -o wide

NAME                               READY   STATUS    RESTARTS   AGE   IP               NODE       NOMINATED NODE   READINESS GATES
coredns-55b448c5dd-7t8zb           1/1     Running   0          37m   10.244.1.2       devops   <none>           <none>
coredns-55b448c5dd-nwmgq           1/1     Running   0          37m   10.244.2.2       master03   <none>           <none>
etcd-master01                      1/1     Terminating   1          13m   192.128.232.11   master01   <none>           <none>
etcd-master02                      1/1     Running   0          52m   192.128.232.12   devops   <none>           <none>
etcd-master03                      1/1     Running   0          47m   192.128.232.13   master03   <none>           <none>
kube-apiserver-master01            1/1     Terminating   1          13m   192.128.232.11   master01   <none>           <none>
kube-apiserver-master02            1/1     Running   0          52m   192.128.232.12   devops   <none>           <none>
kube-apiserver-master03            1/1     Running   0          47m   192.128.232.13   master03   <none>           <none>
kube-controller-manager-master01   1/1     Terminating   4          13m   192.128.232.11   master01   <none>           <none>
kube-controller-manager-master02   1/1     Running   2          49m   192.128.232.12   devops   <none>           <none>
kube-controller-manager-master03   1/1     Running   0          45m   192.128.232.13   master03   <none>           <none>
kube-flannel-ds-amd64-75z97        1/1     Running   0          47m   192.128.232.13   master03   <none>           <none>
kube-flannel-ds-amd64-7q6pz        1/1     Running   1          52m   192.128.232.12   devops   <none>           <none>
kube-flannel-ds-amd64-gtcpb        1/1     Terminating   2          13m   192.128.232.11   master01   <none>           <none>
kube-proxy-9l66g                   1/1     Running   0          52m   192.128.232.12   devops   <none>           <none>
kube-proxy-qgbr8                   1/1     Terminating   0          13m   192.128.232.11   master01   <none>           <none>
kube-proxy-sjx5q                   1/1     Running   0          47m   192.128.232.13   master03   <none>           <none>
kube-scheduler-master01            1/1     Terminating   4          13m   192.128.232.11   master01   <none>           <none>
kube-scheduler-master02            1/1     Running   3          49m   192.128.232.12   devops   <none>           <none>
kube-scheduler-master03            1/1     Running   0          45m   192.128.232.13   master03   <none>           <none>

#现在k8s集群只有devops跟master03
[root@devops ~]# kubectl get pods -n kube-system -o wide
NAME                               READY   STATUS    RESTARTS   AGE   IP               NODE       NOMINATED NODE   READINESS GATES
coredns-55b448c5dd-7t8zb           1/1     Running   0          20m   10.244.1.2       devops   <none>           <none>
coredns-55b448c5dd-nwmgq           1/1     Running   0          20m   10.244.2.2       master03   <none>           <none>
etcd-devops                      1/1     Running   0          35m   192.128.232.12   devops   <none>           <none>
etcd-master03                      1/1     Running   0          30m   192.128.232.13   master03   <none>           <none>
kube-apiserver-devops            1/1     Running   0          35m   192.128.232.12   devops   <none>           <none>
kube-apiserver-master03            1/1     Running   0          30m   192.128.232.13   master03   <none>           <none>
kube-controller-manager-devops   1/1     Running   1          32m   192.128.232.12   devops   <none>           <none>
kube-controller-manager-master03   1/1     Running   0          28m   192.128.232.13   master03   <none>           <none>
kube-flannel-ds-amd64-75z97        1/1     Running   0          30m   192.128.232.13   master03   <none>           <none>
kube-flannel-ds-amd64-7q6pz        1/1     Running   1          35m   192.128.232.12   devops   <none>           <none>
kube-proxy-9l66g                   1/1     Running   0          35m   192.128.232.12   devops   <none>           <none>
kube-proxy-sjx5q                   1/1     Running   0          30m   192.128.232.13   master03   <none>           <none>
kube-scheduler-devops            1/1     Running   1          32m   192.128.232.12   devops   <none>           <none>
kube-scheduler-master03            1/1     Running   0          28m   192.128.232.13   master03   <none>           <none>

#需要马上修复或者重新添加一个master进来,如果在挂掉一个master,那集群就挂了,因为etcd集群数据会全部失去。

八,安装node节点

##当你的token忘了或者过期,解决办法如下:
#第一种情况,token过期
1.先获取token

#查看当前未过期token
[root@master01 ~]# kubeadm token list  | awk -F" " '{print $1}' |tail -n 1

#如果过期上面没有输出 ,可先执行此命令,创建新的token
[root@master01 ~]# kubeadm token create --print-join-command  

#第二种情况,token未过期
1.列出token
[root@master01 ~]# kubeadm token list  | awk -F" " '{print $1}' |tail -n 1
jd0u21.4ydhozszk7255xxb

2.获取CA公钥的哈希值
[root@master01 ~]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed  's/^ .* //'
(stdin)= d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc

3.从节点加入集群
[root@master01 ~]# kubeadm join 192.128.232.15:6443 --token jd0u21.4ydhozszk7255xxb  --discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc

4.添加node01到k8s集群
[root@node01 ~]# kubeadm join 192.128.232.15:6443 --token ml3xl4.lxb0gclu3uhbpxoy --discovery-token-ca-cert-hash sha256:10d33f646d7f9b35502133045e67b30aeddbd456b7f7fecbb5922751c34828f2
W1208 11:18:51.965242    1664 join.go:346] [preflight] WARNING: JoinControlPane.controlPlane settings will be ignored when control-plane flag is not set.
[preflight] Running pre-flight checks
        [WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.6. Latest validated version: 19.03
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.18" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

5.查看node01添加到k8s集群,由于上面把master01进行剔除k8s集群,
[root@master02 ~]# kubectl get nodes
NAME       STATUS   ROLES    AGE   VERSION
master02   Ready    master   18h   v1.18.18
master03   Ready    master   18h   v1.18.18
node01     Ready    <none>   15s   v1.18.18

6.查看两台master上是否存在污点,不让pod调度到master节点,污点后面会详细介绍
下面两台都有污点了。如果没有污点,Taints是没有任何值的。
[root@master02 ~]# kubectl describe node devops|grep Taints
Taints:             node-role.kubernetes.io/master:NoSchedule

[root@master02 ~]# kubectl describe node master03|grep Taints
Taints:             node-role.kubernetes.io/master:NoSchedule

7.简单交付一个deployment到k8s集群
[root@master02 ~]# cat >nginx-dp.yaml<<EOF 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-deployment
spec:
  selector:
    matchLabels:
      app: nginx
  replicas: 2           # tells deployment to run 2 pods matching the template
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx
        image: nginx:1.16
        ports:
        - containerPort: 80
        volumeMounts:
        - name: localtime
          mountPath: /etc/localtime
      volumes:
       - name: localtime
         hostPath:
           path: /usr/share/zoneinfo/Asia/Shanghai
EOF

8.pod只是调度到了node01节点上,master节点上不调度。
[root@master02 ~]# kubectl get pods -o wide
NAME                                READY   STATUS    RESTARTS   AGE   IP           NODE     NOMINATED NODE   READINESS GATES
nginx-deployment-767cbb69b8-jss4k   1/1     Running   0          12m   10.244.0.2   node01   <none>           <none>
nginx-deployment-767cbb69b8-t79kx   1/1     Running   0          12m   10.244.0.3   node01   <none>           <none>

七、etcd的作用
Etcd是Kubernetes集群中的一个十分重要的组件,用于保存集群所有的网络配置和对象的状态信息

1.tcd的常用操作命令
 a.将etcdctl命令复制到hosts主机
[root@master01 manifests]# docker ps -a|grep k8s_etcd
6f67d5325e6e  303ce5db0e90  "etcd --advertise-cl…" 9 minutes ago Up 9 minutes k8s_etcd_etcd-master01_kube-system_dbdfd4ec66544be175f95fdae2031419_222

b.拷贝etcd容器的命令到主机上
[root@master01 manifests]# docker cp k8s_etcd_etcd-master01_kube-system_dbdfd4ec66544be175f95fdae2031419_222:/usr/local/bin/etcdctl /usr/local/bin/
 
c.查看命令是否生效了
[root@master01 manifests]# etcdctl version
etcdctl version: 3.4.3
API version: 3.4
 
d.查看etcd集群成员节点
[root@master01 manifests]# etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt \
 --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key member list -w table
+------------------+---------+----------+-----------------------------+-----------------------------+------------+
|        ID        | STATUS  |   NAME   |         PEER ADDRS          |        CLIENT ADDRS         | IS LEARNER |
+------------------+---------+----------+-----------------------------+-----------------------------+------------+
|  326ec117eddf797 | started | master01 | https://192.128.232.11:2380 | https://192.128.232.11:2379 |      false |
| 27a323610abdf106 | started | master02 | https://192.128.232.12:2380 | https://192.128.232.12:2379 |      false |
| 945a20b0e323f57f | started | master03 | https://192.128.232.13:2380 | https://192.128.232.13:2379 |      false |
+------------------+---------+----------+-----------------------------+-----------------------------+------------+

e.查看节点的状态信息
[root@master01 manifests]# etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt  --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt --key=/etc/kubernetes/pki/etcd/healthcheck-client.key endpoint status -w table
+--------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
|         ENDPOINT         |       ID        | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+--------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://[127.0.0.1]:2379 | 326ec117eddf797 |   3.4.3 |  3.2 MB |      true |      false |     13650 |      40194 |              40194 |        |
+--------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
 

                      

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐