k8s集群部署参考-Rocky
################ 查看资源类型 ################################## 了解pod运行状况 ##################打印pod完整的资源规范,通过status字段了解#打印pod资源的详细状态#获取pod中容器应用的日志################# 增加和删除污点 ################## 查看节点Taints# 删除节点
Rocky9.4 部署K8s
1 系统安装
1 镜像下载
https://mirrors.aliyun.com/rockylinux/
2 系统安装略(和安装centos基本一致)
3 IP配置
[root@rocky-01 ~]# vi /etc/NetworkManager/system-connections/ens160.nmconnection
[connection]
id=ens160
uuid=9065b76e-d3d9-3855-beb1-c6aa8fb63ebc
type=ethernet
autoconnect-priority=-999
interface-name=ens160
timestamp=1720967833
[ethernet]
[ipv4]
method=manual # auto改为manual
address1=11.0.1.140/24,11.0.1.2 # IP、子网掩码、网关
dns=11.0.1.2 # dns
[ipv6]
addr-gen-mode=eui64
method=auto
[proxy]
[root@rocky-01 ~]# nmcli connection reload
[root@rocky-01 ~]# nmcli connection up ens160
[root@rocky-01 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens160: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:71:7a:07 brd ff:ff:ff:ff:ff:ff
altname enp3s0
inet 11.0.1.140/24 brd 11.0.1.255 scope global noprefixroute ens160
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe71:7a07/64 scope link noprefixroute
valid_lft forever preferred_lft forever
# 另外一个办法,使用nmcli命令直接添加
nmcli con modify 'ens160' ifname ens160 ipv4.method manual ipv4.addresses 10.10.10.10/24 gw4 10.10.10.254
nmcli con modify 'ens160' ipv4.dns 10.10.10.10
nmcli con down 'ens160'
nmcli con up 'ens160'
4 yum源更换
################################# 配置本地镜像源 #################################
【1】 创建目录
# 创建系统镜像文件存放目录
[root@rocky-01 ~]# mkdir /mnt/iso
# 创建系统镜像挂载目录
[root@rocky-01 ~]# mkdir /mnt/cdrom
# 上传下载好的系统镜像文件Rocky-9.4-x86_64-dvd.iso到/mnt/iso目录下
[root@rocky-01 ~]# ll /mnt/iso/
总用量 10660544
-rw-r--r--. 1 root root 10916397056 7月 14 15:20 Rocky-9.4-x86_64-dvd.iso
【2】挂载系统镜像文件
# 将系统镜像文件挂载到/mnt/cdrom目录
[root@rocky-01 ~]# mount -o loop /mnt/iso/Rocky-9.4-x86_64-dvd.iso /mnt/cdrom
#提示:mount: /mnt/cdrom: WARNING: device write-protected, mounted read-only.
# 使用df -TH查看,会出现类似下面的挂载目录,说明挂载成功
[root@rocky-01 ~]# df -TH |grep /mnt/cdrom
/dev/loop0 iso9660 11G 11G 0 100% /mnt/cdrom
【3】配置本地yum源
# 创建本地仓库
[root@rocky-01 ~]# mkdir /etc/yum.repos.d/bak
[root@rocky-01 ~]# mv /etc/yum.repos.d/rocky* /etc/yum.repos.d/bak/
# 使用 cp 命令将 media.repo 文件复制制到 /etc/yum.repos.d 目录下
[root@rocky-01 ~]# cp /mnt/cdrom/media.repo /etc/yum.repos.d/
# 更改 media.repo 文件
[root@rocky-01 ~]# vi /etc/yum.repos.d/media.repo
[InstallMedia-BaseOS]
name=Rocky Linux 9.0 - BaseOS
mediaid=1714885940.021839
metadata_expire=-1
gpgcheck=0
cost=500
baseurl=file:///mnt/cdrom/BaseOS
[InstallMedia-AppStream]
name=Rocky Linux 9.0 - AppStream
mediaid=1714885940.021839
metadata_expire=-1
gpgcheck=0
cost=500
baseurl=file:///mnt/cdrom/AppStream
# 保存退出,然后清空cache、重建cache
yum clean all #清空cache
yum makecache #重建cache
【4】设置开机自动挂载
#查看
[root@rocky-01 yum.repos.d]# mount -l | grep cdrom
/mnt/iso/Rocky-9.4-x86_64-dvd.iso on /mnt/cdrom type iso9660 (ro,relatime,nojoliet,check=s,map=n,blocksize=2048) [Rocky-9-4-x86_64-dvd]
#编辑/etc/fstab文件,添加以下代码。实现开机自动挂载
[root@rocky-01 yum.repos.d]# vi /etc/fstab
/mnt/iso/Rocky-9.4-x86_64-dvd.iso /mnt/cdrom iso9660 defaults,ro,loop 0 0
:wq! #保存退出
#系统重启后,使用df -h查看系统挂载目录
5 安装必要软件和工具
yum -y install wget tree curl bash-completion jq vim net-tools telnet lrzsz
2 OS系统初始化(所有节点操作)
1 关闭或者放行指定端口防火墙(这里采用关闭方式)
systemctl disable --now firewalld.service #关闭并禁止开机自启
Removed "/etc/systemd/system/multi-user.target.wants/firewalld.service".
Removed "/etc/systemd/system/dbus-org.fedoraproject.FirewallD1.service".
systemctl status firewalld # 查看防火墙状态
○ firewalld.service - firewalld - dynamic firewall daemon
Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; preset: enabled)
Active: inactive (dead)
Docs: man:firewalld(1)
2 关闭SELinux
# 永久关闭
sed -ri 's#(SELINUX=)enforcing#\1disabled#g' /etc/selinux/config
# 临时关闭
setenforce 0
getenforce #查看状态
Permissive
3 配置hosts解析
重要:k8sapi为必加域名,用于api接口对接,集群模式一般使用浮动IP,如不添加,集群初始化则失败
cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
11.0.1.140 k8s-master01
11.0.1.141 k8s-node01
11.0.1.142 k8s-node02
11.0.1.150 k8sapi #VIP
4 此处在master节点进行配置ssh免密(可选)
# 匹配出所以k8s集群IP
[root@rocky-01 ~]# sed -nE '/k8s-/s#^([^ ]+) .*$#\1#p' /etc/hosts
5 配置时间同步
# 我这里直接配置阿里云的
[root@rocky-01 ~]# vi /etc/chrony.conf#编辑配置文件加入一下内容
pool ntp1.aliyun.com iburst
pool ntp2.aliyun.com iburst
pool cn.pool.ntp.org iburst
[root@rocky-01 ~]# systemctl enable --now chronyd #配置开机自启
[root@rocky-01 ~]# chronyc sources #测试
# 内网环境
# 如果有ntp服务器,直接将地址配置在 /etc/chrony.conf 文件里面
server x.x.x.x iburst
# 重启chronyd服务,并设置开机启动
systemctl restart chronyd && systemctl enable chronyd
# 如果没有ntp服务器,那么就自己选择一台服务器充当ntp服务器进行配置
6 禁用swap分区
swapoff -a #临时禁用
sed -i 's/.*swap.*/#&/' /etc/fstab #永久禁用
# 检测
[root@rocky-01 ~]# free -m
total used free shared buff/cache available
Mem: 1935 423 1482 5 181 1511
Swap: 0 0 0
7 修改内核参数
cat >> /etc/sysctl.d/k8s.conf << EOF
#内核参数调整
vm.swappiness=0
#配置iptables参数,使得流经网桥的流量也经过iptables/netfilter防火墙
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
#启用IP 转发功能
net.ipv4.ip_forward = 1
#允许文件系统分离挂载
fs.may_detach_mounts = 1
#减少交换分区的使用倾向。
vm.swappiness = 0
#允许内存超配
vm.overcommit_memory=1
#在内存不足时不触发系统崩溃
vm.panic_on_oom=0
#设置进程的最大内存映射区域数量
vm.max_map_count=655360
#增加 inotify 监视的最大用户手表数量
fs.inotify.max_user_watches=89100
#增加文件描述符的最大数量
fs.file-max=52706963
#和打开文件的最大数量
fs.nr_open=52706963
#设置网络连接跟踪的最大数量
net.netfilter.nf_conntrack_max=2310720
#TCP 连接在空闲状态下保持存活的时间(秒),超过这个时间后开始发送保活探测包。
net.ipv4.tcp_keepalive_time = 600
#发送保活探测包的次数,如果都没有响应则关闭连接。
net.ipv4.tcp_keepalive_probes = 3
#发送保活探测包的时间间隔(秒)
net.ipv4.tcp_keepalive_intvl = 15
#设置系统中处于 TIME_WAIT 状态的最大连接数量。
net.ipv4.tcp_max_tw_buckets = 36000
#允许将处于 TIME_WAIT 状态的套接字重新用于新的连接。
net.ipv4.tcp_tw_reuse = 1
#系统所能处理的孤儿套接字(没有父进程的套接字)的最大数量。
net.ipv4.tcp_max_orphans = 327680
#孤儿套接字的重试次数。
net.ipv4.tcp_orphan_retries = 3
#启用 SYN Cookies 功能,防范 SYN 洪水攻击。
net.ipv4.tcp_syncookies = 1
#SYN 队列(未完成连接队列)的最大长度。
net.ipv4.tcp_max_syn_backlog = 16384
#最大的 IP 连接跟踪数量。
net.ipv4.ip_conntrack_max = 65536
#禁用 TCP 时间戳选项。
net.ipv4.tcp_timestamps = 0
#套接字监听队列的最大长度。
net.core.somaxconn = 16384
EOF
#可以删除注释
sed -i '/^#/d' /etc/sysctl.d/k8s.conf
#加载网桥过滤模块
cat <<EOF | tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
#重新加载
sysctl --system 或 sysctl -p /etc/sysctl.d/k8s.conf
#检测
sysctl -a
vm.swappiness = 0
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
#返回如下表示加载成功
lsmod | grep br_netfilter
br_netfilter 32768 0
bridge 303104 1 br_netfilter
8 配置ipvs功能
yum install ipset ipvsadm -y #安装ipset和ipvsadm
# 添加需要加载的模块写入脚本文件
cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf
overlay
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF
modprobe overlay
modprobe ip_vs && modprobe ip_vs_rr && modprobe ip_vs_wrr && modprobe ip_vs_sh && modprobe nf_conntrack
#查看模块是否加载成功
lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh 16384 0
ip_vs_wrr 16384 0
ip_vs_rr 16384 0
ip_vs 188416 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack 176128 3 nf_nat,nft_ct,ip_vs
nf_defrag_ipv6 24576 2 nf_conntrack,ip_vs
libcrc32c 16384 5 nf_conntrack,nf_nat,nf_tables,xfs,ip_vs
9 修改 limits 参数
cat <<EOF >>/etc/security/limits.conf
* soft nofile 65535
* hard nofile 65535
* soft nproc 65535
* hard nproc 65535
soft memlock unlimited
hard memlock unlimited
EOF
3 安装docker(所有节点操作)
1 下载地址
https://download.docker.com/linux/static/stable/x86_64/docker-20.10.10.tgz
2 安装docker
#脚本目录结构
[root@rocky-01 weihu]# pwd
/home/weihu
[root@rocky-01 weihu]# ls
script src
# 脚本
[root@rocky-01 weihu]# cat /home/weihu/script/install_docker.sh
#!/bin/bash
MY_PATH=$(cd $(dirname $0);pwd)
DOCKER_TAG='docker-20.10.10.tgz'
function Install_docker(){
#安装docker
read -p "请输入docker安装目录,默认/home/docker,如使用默认路径可直接回车,自定义请按默认路径格式输入: " dockerpath
if [ -z "$dockerpath" ];then
dockerpath=/home/docker
else
mkdir -p $dockerpath
fi
dockerstatus=$(systemctl status docker 2> /dev/null |grep Active |grep running |wc -l)
if [ $dockerstatus -eq 1 ];then
systemctl stop docker
fi
cd $MY_PATH/../src && \
tar -xf $DOCKER_TAG && \
chmod 755 docker/* && \
cp docker/* /usr/bin/ && \
groupadd docker
#增加docker服务
cat > /usr/lib/systemd/system/docker.service <<"EOF"
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/bin/dockerd -H unix://var/run/docker.sock
ExecReload=/bin/kill -s HUP $MAINPID
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
EOF
if [ -d "/etc/docker/" ];then rm -rf /etc/docker/;fi && \
mkdir -p /etc/docker/ && \
cat > /etc/docker/daemon.json << EOF
{
"experimental": true,
"ip6tables": true,
"exec-opts": ["native.cgroupdriver=systemd"],
"data-root": "$dockerpath"
}
EOF
systemctl daemon-reload
systemctl enable --now docker > /dev/null 2>&1
sleep 5
dockerstatus=$(systemctl status docker 2> /dev/null |grep Active |grep running |wc -l)
if [ $dockerstatus -eq 1 ];then
echo "docker启动成功"
else
echo "docker启动失败"
fi
}
Install_docker
4 cri-dockerd环境操作(所有节点操作)
如果你使用Docker作为K8S容器运行时的话,kubelet需要先要通过 dockershim 去调用Docker,再通过Docker去调用containerd。
如果你使用containerd作为K8S容器运行时的话,由于containerd内置了 CRI (Container Runtime Interface:容器运行时接口)插件,kubelet可以直接调用containerd。
配置cri-docker使kubernetes以docker作为运行时。
自 1.24 版起,Dockershim 已从 Kubernetes 项目中移除。因为历史问题docker却不支持kubernetes主推的CRI(容器运行时接口)标准,所以docker不能再作为kubernetes的容器运行时了,即从kubernetesv1.24开始不再使用docker了。
但是如果想继续使用docker的话,可以在kubelet和docker之间加上一个中间层cri-docker。cri-docker是一个支持CRI标准的shim(垫片)。一头通过CRI跟kubelet交互,另一头跟docker api交互,从而间接的实现了kubernetes以docker作为容器运行时。
1 下载地址
https://github.com/Mirantis/cri-dockerd/releases
2 解压到指定目录并且查看
[root@rocky-01 src]# ls cri-dockerd-0.3.9.amd64.tgz
cri-dockerd-0.3.9.amd64.tgz
[root@rocky-01 src]# tar -xf cri-dockerd-0.3.9.amd64.tgz -C /usr/local/
[root@rocky-01 src]# mv /usr/local/cri-dockerd/cri-dockerd /usr/local/bin/
[root@rocky-01 src]# cri-dockerd --version
cri-dockerd 0.3.9 (c50b98d)
# 或者用下面这个直接解压到/usr/local/bin/下面
tar -xvf cri-dockerd-0.3.9.amd64.tgz --strip-components=1 -C /usr/local/bin/ #解压缩到指定目录
3 定制配置文件
仅支持CRI规范的kubelet需要经由遵循该规范的cri-dockerd完成与docker-ce的整合
cat > /etc/systemd/system/cri-dockerd.service<<-EOF
[Unit]
Description=CRI Interface for Docker Application Container Engine
Documentation=https://docs.mirantis.com
After=network-online.target firewalld.service docker.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/local/bin/cri-dockerd --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.9 --network-plugin=cni --cni-conf-dir=/etc/cni/net.d --cni-bin-dir=/opt/cni/bin --container-runtime-endpoint=unix:///var/run/cri-dockerd.sock --cri-dockerd-root-directory=/var/lib/dockershim --docker-endpoint=unix:///var/run/docker.sock --
cri-dockerd-root-directory=/var/lib/docker
ExecReload=/bin/kill -s HUP $MAINPID
TimeoutSec=0
RestartSec=2
Restart=always
StartLimitBurst=3
StartLimitInterval=60s
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity
Delegate=yes
KillMode=process
[Install]
WantedBy=multi-user.target
EOF
cat > /etc/systemd/system/cri-dockerd.socket <<-EOF
[Unit]
Description=CRI Docker Socket for the API
PartOf=cri-docker.service
[Socket]
ListenStream=/var/run/cri-dockerd.sock
SocketMode=0660
SocketUser=root
SocketGroup=docker
[Install]
WantedBy=sockets.target
EOF
4 启动cri-dockerd并设置为开机自启
systemctl daemon-reload
systemctl enable --now cri-dockerd.service #启动并设置开机自启
systemctl status cri-dockerd.service #检查状态显示如下
● cri-docker.service - CRI Interface for Docker Application Container Engine
Loaded: loaded (/etc/systemd/system/cri-dockerd.service; enabled; preset: disabled)
Active: active (running) since Tue 2024-01-09 15:51:04 CST; 10s ago
...
5 集群安装部署
5.1 找台能联网的机器用来打离线包(集群节点之外)
集群之外随便一台机器操作
[root@rocky-02 ~]# ping www.baidu.com -c 3
PING www.a.shifen.com (153.3.238.102) 56(84) 比特的数据。
64 比特,来自 153.3.238.102 (153.3.238.102): icmp_seq=1 ttl=128 时间=13.3 毫秒
64 比特,来自 153.3.238.102 (153.3.238.102): icmp_seq=2 ttl=128 时间=13.2 毫秒
64 比特,来自 153.3.238.102 (153.3.238.102): icmp_seq=3 ttl=128 时间=15.2 毫秒
--- www.a.shifen.com ping 统计 ---
已发送 3 个包, 已接收 3 个包, 0% packet loss, time 3043ms
rtt min/avg/max/mdev = 13.151/13.900/15.246/0.953 ms
配置国内kubernetes源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
yum makecache # 生成缓存
yum --showduplicates list kubelet #可以列出kubelet软件包的所有可用版本
yum --showduplicates list kubectl #可以列出kubectl软件包的所有可用版本
yum --showduplicates list kubeadm #可以列出kubeadm软件包的所有可用版本
制作离线包
[root@rocky-02 ~]# yum -y install yum-utils # 安装工具(yumdownloader)
# 下载 K8s 软件包到 kubernetes-1.24.4 目录
[root@rocky-02 ~]# yumdownloader kubelet-1.24.4-0 --resolve --destdir=/data/download/kubernetes-1.24.4/
[root@rocky-02 ~]# yumdownloader kubectl-1.24.4-0 --resolve --destdir=/data/download/kubernetes-1.24.4/
[root@rocky-02 ~]# yumdownloader kubeadm-1.24.4-0 --resolve --destdir=/data/download/kubernetes-1.24.4/
[root@rocky-02 ~]# ls /data/download/kubernetes-1.24.4/
0f2a2afd740d476ad77c508847bad1f559afc2425816c1f2ce4432a62dfe0b9d-kubernetes-cni-1.2.0-0.x86_64.rpm
3f5ba2b53701ac9102ea7c7ab2ca6616a8cd5966591a77577585fde1c434ef74-cri-tools-1.26.0-0.x86_64.rpm
92b115844af8f846dde1c1a8c63c072b103a2f81c092b989cbb2693ffe448eba-kubelet-1.24.4-0.x86_64.rpm
a24e42254b5a14b67b58c4633d29c27370c28ed6796a80c455a65acc813ff374-kubectl-1.28.2-0.x86_64.rpm
ab92f89fd881db751ff6da81a8b2032d0561fc83be5d010028c03e5e878ee2fa-kubeadm-1.24.4-0.x86_64.rpm
conntrack-tools-1.4.7-2.el9.x86_64.rpm
e1cae938e231bffa3618f5934a096bd85372ee9b1293081f5682a22fe873add8-kubelet-1.28.2-0.x86_64.rpm
f915aaf40fbb679d52e88f4a3dffb25b823d0bc263fd403ab3f1c6a0d3e46775-kubectl-1.24.4-0.x86_64.rpm
libnetfilter_cthelper-1.0.0-22.el9.x86_64.rpm
libnetfilter_cttimeout-1.0.0-19.el9.x86_64.rpm
libnetfilter_queue-1.0.5-1.el9.x86_64.rpm
socat-1.7.4.1-5.el9_4.2.x86_64.rpm
# 进入 kubernetes-1.24.4 目录,删除非1.23.0版本的 kubectl 与 kubelet
e1cae938e231bffa3618f5934a096bd85372ee9b1293081f5682a22fe873add8-kubelet-1.28.2-0.x86_64.rpm
a24e42254b5a14b67b58c4633d29c27370c28ed6796a80c455a65acc813ff374-kubectl-1.28.2-0.x86_64.rpm
#将软件包压缩为gz格式
[root@rocky-02 ~]# tar -zcf kubernetes-1.24.4.tar.gz kubernetes-1.24.4
提前拉取安装时所需要的镜像
# 先检查安装时需要哪些镜像,只在一个节点操作
[root@rocky-01 ~]# kubeadm config images list --kubernetes-version=1.24.4
I0717 09:28:14.172155 2178 version.go:255] remote version is much newer: v1.30.2; falling back to: stable-1.24
k8s.gcr.io/kube-apiserver:v1.24.17
k8s.gcr.io/kube-controller-manager:v1.24.17
k8s.gcr.io/kube-scheduler:v1.24.17
k8s.gcr.io/kube-proxy:v1.24.17
k8s.gcr.io/pause:3.7
k8s.gcr.io/etcd:3.5.3-0
k8s.gcr.io/coredns/coredns:v1.8.6
# 拉取镜像
# cat /home/weihu/script/images.sh
#!/bin/bash
images=$(kubeadm config images list --kubernetes-version=1.24.4 | awk -F'/' '{print $NF}')
for i in ${images}
do
docker pull registry.aliyuncs.com/google_containers/$i
#docker tag registry.aliyuncs.com/google_containers/$i kubernetes-register.openlab.cn/google_containers/$i
#docker push kubernetes-register.openlab.cn/google_containers/$i
#docker rmi registry.aliyuncs.com/google_containers/$i
done
#执行脚本文件
sh images.sh
# 将镜像导出
docker save `docker images |grep "registry.aliyuncs.com/google_containers"|awk '{print $1":" $2}'|tr '\n' ' '` -o /data/images.tar
5.2 内网环境安装kubelet、kubeadm和kubectl并导入镜像(所有节点操作)
1 安装 kubelet、kubeadm和kubectl
1 上传离线包到内网服务器节点中,进行解压安装
tar -xf kubernetes-1.24.4.tar.gz
yum localinstall kubernetes-1.24.4/*.rpm -y
2 设置命令的补全功能
yum -y install bash-completion
kubeadm completion bash > /etc/bash_completion.d/kubeadm
kubectl completion bash > /etc/bash_completion.d/kubectl
3 配置 kubelet
1)启用 Cgroup 控制组,用于限制进程的资源使用量,如CPU、内存等(默认)
2)配置kubelet,为其指定cri-dockerd在本地打开的Unix Sock文件的路径,该路径一般默认为“/run/cri-dockerd.sock“,需要说明的是,该配置也可不进行,而是直接在后面的各kubeadm命令上使用“--cri-socket unix:///run/cri-dockerd.sock”选项
cat > /etc/sysconfig/kubelet <<EOF
KUBELET_EXTRA_ARGS="--cgroup-driver=systemd"
KUBELET_KUBEADM_ARGS="--container-runtime=remote --container-runtime-endpoint=/run/cri-dockerd.sock"
EOF
4 设置 kubelet 开机自启动即可,集群初始化后自动启动
systemctl enable kubelet
2 导入镜像
docker load -i /data/images.tar #将镜像包上传,然后导入
docker images #查看
REPOSITORY TAG IMAGE ID CREATED SIZE
registry.aliyuncs.com/google_containers/kube-apiserver v1.24.4 6cab9d1bed1b 23 months ago 130MB
registry.aliyuncs.com/google_containers/kube-proxy v1.24.4 7a53d1e08ef5 23 months ago 110MB
registry.aliyuncs.com/google_containers/kube-scheduler v1.24.4 03fa22539fc1 23 months ago 51MB
registry.aliyuncs.com/google_containers/kube-controller-manager v1.24.4 1f99cb6da9a8 23 months ago 119MB
registry.aliyuncs.com/google_containers/etcd 3.5.3-0 aebe758cef4c 2 years ago 299MB
registry.aliyuncs.com/google_containers/pause 3.7 221177c6082a 2 years ago 711kB
registry.aliyuncs.com/google_containers/coredns v1.8.6 a4ca41631cc7 2 years ago 46.8MB
5.3 部署nginx+keepalived(所有master节点部署)
也可以单独找两台机器来部署nginx+keealived
如果不做负载均衡只做master高可用可以直接在所有master部署keepalived即可
nginx部署安装(所有master节点配置一样)
# 下载地址
http://nginx.org/download/nginx-1.24.0.tar.gz
# 解压、编译安装(需要做四层负载,编译参数自己定义,但是--with-stream必须加)
tar -xf nginx-1.24.0.tar.gz
cd nginx-1.24.0/
./configure --with-stream --prefix=/usr/local/nginx
make && make install
# 修改nginx配置文件
# 创建日志目录
$ mkdir /var/log/nginx
$ vi /usr/local/nginx/conf/nginx.conf
worker_processes 2;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 10240;
}
stream {
upstream k8s-apiserver {
server 11.0.1.140:6443 weight=5 max_fails=3 fail_timeout=30s;
server 11.0.1.141:6443 weight=5 max_fails=3 fail_timeout=30s;
server 11.0.1.142:6443 weight=5 max_fails=3 fail_timeout=30s;
}
server {
listen 6443;
proxy_pass k8s-apiserver;
proxy_connect_timeout 15s;
proxy_timeout 15s;
}
log_format proxy '$remote_addr [$time_local] '
'$protocol $status $bytes_sent $bytes_received '
'$session_time "$upstream_addr" '
'"$upstream_bytes_sent" "$upstream_bytes_received" "$upstream_connect_time"';
access_log /var/log/nginx/access.log proxy;
}
# 配置nginx启动服务文件
$ vi /usr/lib/systemd/system/nginx.service
[Unit]
Description=nginx
After=network.target
[Service]
Type=forking
ExecStart=/usr/local/nginx/sbin/nginx
ExecReload=/usr/local/nginx/sbin/nginx -s reload
ExecStop=/usr/local/nginx/sbin/nginx -s quit
PrivateTmp=true
[Install]
WantedBy=multi-user.target
# 启动并检查nginx
systemctl daemon-reload
systemctl enable nginx.service
systemctl restart nginx.service
systemctl status nginx
keepalived部署(节点配置文件略有不通)
# 下载地址
https://www.keepalived.org/download.html
# 安装依赖
yum install -y gcc openssl-devel libnl* popt-devel
# 解压、编译安装
tar -xf keepalived-2.2.8.tar.gz
cd keepalived-2.2.8
./configure --prefix=/usr/local/keepalived
make && make install
#创建配置文件,修改配置文件,配置文件略有不同,因为这个采用了非抢占模式
## master01配置
mkdir /etc/keepalived/
$ vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id k8s-master01
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
interval 5
weight -60
fall 2
rise 2
}
vrrp_instance VI_1 {
state BACKUP
nopreempt
interface ens160
virtual_router_id 56 # VRRP 路由 ID实例,每个实例是唯一的
priority 100 # 优先级,备服务器设置 90
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP,hosts解析要对应好
virtual_ipaddress {
11.0.1.150/24
}
track_script {
check_nginx
}
}
## master02配置
$ vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id master2
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
interval 5
weight -60
fall 2
rise 2
}
vrrp_instance VI_1 {
state BACKUP
nopreempt
interface ens160
virtual_router_id 56 # VRRP 路由 ID实例,每个实例是唯一的
priority 90 # 优先级,备服务器设置 90
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
11.0.1.150/24
}
track_script {
check_nginx
}
}
## master03配置
$ vi /etc/keepalived/keepalived.conf
global_defs {
router_id k8s-master03
}
vrrp_script check_nginx {
script "/etc/keepalived/check_nginx.sh"
interval 5
weight -60
fall 2
rise 2
}
vrrp_instance VI_1 {
state BACKUP
nopreempt
interface ens160
virtual_router_id 56 # VRRP 路由 ID实例,每个实例是唯一的
priority 80 # 优先级,备服务器设置 90
advert_int 1 # 指定VRRP 心跳包通告间隔时间,默认1秒
authentication {
auth_type PASS
auth_pass 1111
}
# 虚拟IP
virtual_ipaddress {
11.0.1.150/24
}
track_script {
check_nginx
}
}
编写检测脚本(所有master几点一样)
$ vi /etc/keepalived/check_nginx.sh
#!/bin/bash
# if check error then repeat check for 12 times, else exit
# 检测次数可以适当调整
err=0
for k in $(seq 1 2)
do
check_code=$(curl -k http://localhost:16443)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 5
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
# if apiserver is down send SIG=1
echo 'nginx error!'
systemctl stop keepalived
exit 1
else
# if apiserver is up send SIG=0
echo 'nginx ok'
fi
chmod +x /etc/keepalived/check_nginx.sh
启动KEEPALIVED并设置开机自启
systemctl start keepalived
systemctl enable keepalived
5.4 初始化k8s集群(第一个控制节点执行k8s-master01)
两种方式,一种是通过命令的方式,另一种是通过yaml文件
############################ 初始化方式一 ############################
kubeadm init --kubernetes-version=1.28.0 \
####注意修改下面一行的地址为自己的master地址
--apiserver-advertise-address=11.0.1.140 \
--image-repository registry.aliyuncs.com/google_containers \
--control-plane-endpoint kubeapi.com \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=Swap \
--cri-socket=unix:///var/run/cri-dockerd.sock
## 命令中的各选项简单说明如下:
#--image-repository:指定要使用的镜像仓库,默认为gcr.io;
#--kubernetes-version:kubernetes程序组件的版本号,它必须要与安装的kubelet程序包的版本号相同;
#--control-plane-endpoint:控制平面的固定访问端点,可以是IP地址或DNS名称,会被用于集群管理员及集群组件的kubeconfig配置文件的API Server的访问地址;单控制平面部署时可以不使用该选项;
#--pod-network-cidr:Pod网络的地址范围,其值为CIDR格式的网络地址,通常,Flannel网络插件的默认为10.244.0.0/16,Project Calico插件的默认值为192.168.0.0/16;
#--service-cidr:Service的网络地址范围,其值为CIDR格式的网络地址,默认为10.96.0.0/12;通常,仅Flannel一类的网络插件需要手动指定该地址;
#--apiserver-advertise-address:apiserver通告给其他组件的IP地址,一般应该为Master节点的用于集群内部通信的IP地址,0.0.0.0表示节点上所有可用地址;
#--token-ttl:共享令牌(token)的过期时长,默认为24小时,0表示永不过期;为防止不安全存储等原因导致的令牌泄露危及集群安全,建议为其设定过期时长。未设定该选项时,在token过期后,若期望再向集群中加入其它节点,可以使用如下命令重新创建token,并生成节点加入命令 : kubeadm token create --print-join-command
# --upload-certs 将证书上传到集群中,以便在后续加入其他节点时可以自动分发证书,简化节点加入的过程,多master节点必须加的参数
# 需要注意的是,若各节点未禁用Swap设备,还需要附加选项“--ignore-preflight-errors=Swap”,从而让kubeadm忽略该错误设定
############################ 初始化方式二(建议) ############################
kubeadm也可通过配置文件加载配置,以定制更丰富的部署选项。以下是个符合前述命令设定方式的使用示例,不过,它明确定义了kubeProxy的模式为ipvs,并支持通过修改imageRepository的值修改获取系统镜像时使用的镜像仓库
#可以通过下面这个命令生成一个默认的初始化yaml文件进行修改
kubeadm config print init-defaults > kubeadm-config.yml
# 修改后的初始化yaml文件
[root@rocky-01 ~]# cat /home/weihu/yaml/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
# 这里的地址即为初始化的控制平面第一个节点的IP地址;
advertiseAddress: 11.0.1.140
bindPort: 6443
nodeRegistration:
#注意:按需调整这里使用的 cri socket 文件的路径
criSocket: unix:///var/run/cri-dockerd.sock
imagePullPolicy: IfNotPresent
# 第一个控制平面节点的主机名称;
name: k8s-master01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
#设置拉取镜像源,按需修改
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
# 版本号要与部署的目标版本保持一致;
kubernetesVersion: 1.24.4
# 控制平面的接入端点,我们这里选择适配到k8sapi这一域名上;
controlPlaneEndpoint: k8sapi:6443
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 10.244.0.0/16
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
# 用于配置kube-proxy上为Service指定的代理模式,默认为iptables;
mode: ipvs
# 执行yaml文件进行初始化
kubeadm init --config=kubeadm-config.yaml --upload-certs | tee kubeadm-init.log
初始化过程
[root@k8s-master01 yaml]# kubeadm init --config=kubeadm-config.yaml --upload-certs | tee kubeadm-init.log
W0717 13:01:05.313175 6473 initconfiguration.go:306] error unmarshaling configuration schema.GroupVersionKind{Group:"kubeadm.k8s.io", Version:"v1beta3", Kind:"ClusterConfiguration"}: strict decoding error: unknown field "dns.type"
[init] Using Kubernetes version: v1.24.4
[preflight] Running pre-flight checks
[WARNING SystemVerification]: missing optional cgroups: blkio
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-master01 k8sapi kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 11.0.1.140]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [k8s-master01 localhost] and IPs [11.0.1.140 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [k8s-master01 localhost] and IPs [11.0.1.140 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 9.005118 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
dbb52071e0a302667d49e5de959fe1ce9b50dde0bbde200ec93d43761254e57a
[mark-control-plane] Marking the node k8s-master01 as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node k8s-master01 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join k8sapi:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:d57753c43a98fa6c1a150bc5a5959baa1574993932089aadf027e4f5f7806615 \
--control-plane --certificate-key dbb52071e0a302667d49e5de959fe1ce9b50dde0bbde200ec93d43761254e57a
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join k8sapi:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:d57753c43a98fa6c1a150bc5a5959baa1574993932089aadf027e4f5f7806615
[root@k8s-master01 yaml]#
初始化失败后处理办法
# 方法一
kubeadm reset
#根据提示输出y回车
#方法二
1. 删除/etc/kubernetes/文件夹下的所有文件
2. 删除$HOME/.kube文件夹
3. 删除/var/lib/etcd文件夹
rm -rf /etc/kubernetes/*
rm -rf ~/.kube/*
rm -rf /var/lib/etcd/*
systemctl stop kubelet
5.5 网络插件部署(k8s-master01执行)
calico
离线安装calico网络组件
1 下载calico离线包
# 首先去github全局搜索calico,点击进入项目,在Releases下下载自己需要的calico版本的离线包(文章以3.26.1版本为例)
https://github.com/projectcalico/calico
2 解压calico离线包并将所需文件上传至服务器
可以先传至那台能联网的服务器中解压,然后只需要将用到的yaml文件和离线镜像传至内网服务器中
(我们解压calico的离线包后会得到很多文件,并不是全部需要)
[root@k8s-node01 ~]# ls release-v3.26.1.tgz
release-v3.26.1.tgz
[root@k8s-node01 ~]# tar -xf release-v3.26.1.tgz
[root@k8s-node01 ~]# ls release-v3.26.1/
bin images manifests
# 首先将calico.yaml文件传至内网服务器(master的一个节点就行)
[root@web release-v3.26.1]# scp manifests/calico.yaml root@IP:
# 然后使用cat calico.yaml |grep image:命令查看calico所需的镜像
[root@k8s-master01 ~]# cat calico.yaml |grep image:
image: docker.io/calico/cni:v3.26.1
image: docker.io/calico/cni:v3.26.1
image: docker.io/calico/node:v3.26.1
image: docker.io/calico/node:v3.26.1
image: docker.io/calico/kube-controllers:v3.26.1
# 这里显示安装calico需要三个镜像,去解压的离线包imgaes文件夹中找到对应的三个离线镜像包文件,这里对应的分别是calico-cni.tar,calico-kube-controllers.tar和calico-node.tar三个离线包,将这三个离线镜像上传至服务器(所有节点)
[root@web release-v3.26.1]# scp images/calico-cni.tar root@IP:/home/weihu/image
[root@web release-v3.26.1]# scp images/calico-kube-controllers.tar root@IP:/home/weihu/image
[root@web release-v3.26.1]# scp images/calico-node.tar root@IP:/home/weihu/image
3 导入镜像
[root@k8s-master01 image]# docker load -i calico-cni.tar
[root@k8s-master01 image]# docker load -i calico-node.tar
[root@k8s-master01 image]# docker load -i calico-kube-controllers.tar
[root@k8s-master01 ~]# docker images |grep calico
calico/kube-controllers v3.26.1 1919f2787fa7 13 months ago 75.1MB
calico/cni v3.26.1 9dee260ef7f5 13 months ago 210MB
calico/node v3.26.1 8065b798a4d6 13 months ago 246MB
4 安装calico
[root@k8s-master01 ~]# kubectl apply -f calico.yaml
[root@k8s-master01 ~]# kubectl get pod -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-kube-controllers-79dcc699f8-45lg7 1/1 Running 0 24m
kube-system calico-node-jllch 1/1 Running 0 24m
kube-system calico-node-mdgtc 1/1 Running 0 24m
kube-system coredns-7f74c56694-r9ztf 1/1 Running 0 4h33m
kube-system coredns-7f74c56694-zqtzl 1/1 Running 0 4h33m
kube-system etcd-k8s-master01 1/1 Running 0 4h33m
kube-system kube-apiserver-k8s-master01 1/1 Running 0 4h33m
kube-system kube-controller-manager-k8s-master01 1/1 Running 0 4h33m
kube-system kube-proxy-c676w 1/1 Running 0 4h33m
kube-system kube-proxy-rf4bv 1/1 Running 0 3h1m
kube-system kube-scheduler-k8s-master01 1/1 Running 0 4h33m
# 查看集群状态是否为Ready
flannel
https://github.com/flannel-io/flannel/releases/
5.6 加入master节点和node节点
注意!master和node的join命令不一样,初始化输出的中间部分为加入master节点的命令,最后部分为加入node节点的命令,可从初始化时的输出或kubeadm-init.log中获取命令
以下操作在master02、03节点使用root操作
# 执行如下命令,加入k8s集群,成为master节点(这是我安装的指令,要根据自己安装的进行加入)
kubeadm join k8sapi:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:d57753c43a98fa6c1a150bc5a5959baa1574993932089aadf027e4f5f7806615 \
--control-plane --certificate-key dbb52071e0a302667d49e5de959fe1ce9b50dde0bbde200ec93d43761254e57a
以下操作在所有node节点使用root操作
# 执行如下命令,加入k8s集群,成为node节点
kubeadm join k8sapi:6443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:d57753c43a98fa6c1a150bc5a5959baa1574993932089aadf027e4f5f7806615
tocken 24小时有效,如过期或扩充node节点,通过以下命令创建新的token
kubeadm token create --print-join-command
6 集群状态检查(k8s-master01操作)
检查集群状态
# 重点查看STATUS内容为Ready时,则说明集群状态正常
kubectl get node
NAME STATUS ROLES AGE VERSION
k8smaster01 Ready master 20m v1.24.4
k8smaster02 Ready master 25m v1.24.4
k8smaster03 Ready master 25m v1.24.4
k8snode01 Ready <none> 26m v1.24.4
k8snode02 Ready <none> 27m v1.24.4
k8snode03 Ready <none> 28m v1.24.4
检查pod状态
# 查看STATUS内容是否有非Running状态的,如有则需要检查pod异常原因
kubectl get pod -A
7 组件部署
1 性能指标监控模块
# 下载地址
https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.6.0/components.yaml
## 修改 metrics-server 启动参数
# metrics-server 会请求每台节点的 kubelet 接口来获取监控数据,接口通过 HTTPS 暴露,但 Kubernetes 节点的 kubelet 使用的是自签证书,若 metrics-server 直接请求 kubelet 接口,将产生证书校验失败的错误,因此需要在 components.yaml 文件中加上 --kubelet-insecure-tls 启动参数
# 且由于 metrics-server 官方镜像仓库存储在 k8s.gcr.io ,国内可能无法直接拉取,您可以自行同步到 CCR 或使用已同步的镜像 registry.aliyuncs.com/google_containers/metrics-server:v0.6.1。
# components.yaml 文件修改示例如下:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls # 加上该启动参数
image: registry.aliyuncs.com/google_containers/metrics-server:v0.6.1 # 国内集群,请替换成这个镜像或者是其他仓库
完整yaml文件内容
cat > /home/app/metrics-v0.6.yaml <<EOF
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- apiGroups:
- ""
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
ports:
- name: https
port: 443
protocol: TCP
targetPort: https
selector:
k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: metrics-server
strategy:
rollingUpdate:
maxUnavailable: 0
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=4443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls
image: registry.aliyuncs.com/google_containers/metrics-server:v0.6.1
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /livez
port: https
scheme: HTTPS
periodSeconds: 10
name: metrics-server
ports:
- containerPort: 4443
name: https
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: https
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 200Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:
- emptyDir: {}
name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
k8s-app: metrics-server
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: metrics-server
namespace: kube-system
version: v1beta1
versionPriority: 100
EOF
应用
# 创建
kubectl apply -f
# 查看pod状态
kubectl get pods -n kube-system| egrep 'NAME|metrics-server'
# 查看pod利用率
kubectl top pods -n kube-system
2 Dashboard
# 下载地址
https://github.com/kubernetes/dashboard
# 页面访问yaml路径,可以直接复制粘贴下来
https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml
# 需要修改的地方
# 1 发布服务的类型是ClusterIP ,外面的机器不能访问,不便于我们通过浏览器访问,因此需要改成NodePort型的
---
kind: Service
apiVersion: v1
.........
spec:
ports:
- port: 443
targetPort: 8443
nodePort: 30000 #添加 nodePort 的端口
type: NodePort #修改为 NodePort 类型
selector:
k8s-app: kubernetes-dashboard
---
# 2 设置令牌失效时间,修改镜像拉取策略
kind: Deployment
......
containers:
- name: kubernetes-dashboard
image: kubernetesui/dashboard:v2.7.0
imagePullPolicy: IfNotPresent # 修改为"IfNotPresent"表示如果本地不存在所需的镜像,才会从镜像仓库拉取
ports:
- containerPort: 8443
protocol: TCP
args:
- --auto-generate-certificates
- --namespace=kubernetes-dashboard
- --token-ttl=86400 #设置令牌失效时间为24小时
修改后的yaml文件
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: Namespace
metadata:
name: kubernetes-dashboard
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
---
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
ports:
- port: 443
targetPort: 8443
nodePort: 30000
type: NodePort
selector:
k8s-app: kubernetes-dashboard
---
apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque
---
apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-csrf
namespace: kubernetes-dashboard
type: Opaque
data:
csrf: ""
---
apiVersion: v1
kind: Secret
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-key-holder
namespace: kubernetes-dashboard
type: Opaque
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard-settings
namespace: kubernetes-dashboard
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
rules:
# Allow Dashboard to get, update and delete Dashboard exclusive secrets.
- apiGroups: [""]
resources: ["secrets"]
resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs", "kubernetes-dashboard-csrf"]
verbs: ["get", "update", "delete"]
# Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map.
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["kubernetes-dashboard-settings"]
verbs: ["get", "update"]
# Allow Dashboard to get metrics.
- apiGroups: [""]
resources: ["services"]
resourceNames: ["heapster", "dashboard-metrics-scraper"]
verbs: ["proxy"]
- apiGroups: [""]
resources: ["services/proxy"]
resourceNames: ["heapster", "http:heapster:", "https:heapster:", "dashboard-metrics-scraper", "http:dashboard-metrics-scraper"]
verbs: ["get"]
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
rules:
# Allow Metrics Scraper to get metrics from the Metrics server
- apiGroups: ["metrics.k8s.io"]
resources: ["pods", "nodes"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kubernetes-dashboard
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard
namespace: kubernetes-dashboard
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kubernetes-dashboard
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubernetes-dashboard
subjects:
- kind: ServiceAccount
name: kubernetes-dashboard
namespace: kubernetes-dashboard
---
kind: Deployment
apiVersion: apps/v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: kubernetes-dashboard
template:
metadata:
labels:
k8s-app: kubernetes-dashboard
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- name: kubernetes-dashboard
image: kubernetesui/dashboard:v2.7.0
imagePullPolicy: IfNotPresent
ports:
- containerPort: 8443
protocol: TCP
args:
- --auto-generate-certificates
- --namespace=kubernetes-dashboard
- --token-ttl=86400
# Uncomment the following line to manually specify Kubernetes API server Host
# If not specified, Dashboard will attempt to auto discover the API server and connect
# to it. Uncomment only if the default does not work.
# - --apiserver-host=http://my-address:port
volumeMounts:
- name: kubernetes-dashboard-certs
mountPath: /certs
# Create on-disk volume to store exec logs
- mountPath: /tmp
name: tmp-volume
livenessProbe:
httpGet:
scheme: HTTPS
path: /
port: 8443
initialDelaySeconds: 30
timeoutSeconds: 30
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsUser: 1001
runAsGroup: 2001
volumes:
- name: kubernetes-dashboard-certs
secret:
secretName: kubernetes-dashboard-certs
- name: tmp-volume
emptyDir: {}
serviceAccountName: kubernetes-dashboard
nodeSelector:
"kubernetes.io/os": linux
# Comment the following tolerations if Dashboard must not be deployed on master
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
---
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: dashboard-metrics-scraper
name: dashboard-metrics-scraper
namespace: kubernetes-dashboard
spec:
ports:
- port: 8000
targetPort: 8000
selector:
k8s-app: dashboard-metrics-scraper
---
kind: Deployment
apiVersion: apps/v1
metadata:
labels:
k8s-app: dashboard-metrics-scraper
name: dashboard-metrics-scraper
namespace: kubernetes-dashboard
spec:
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
k8s-app: dashboard-metrics-scraper
template:
metadata:
labels:
k8s-app: dashboard-metrics-scraper
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
containers:
- name: dashboard-metrics-scraper
image: kubernetesui/metrics-scraper:v1.0.8
ports:
- containerPort: 8000
protocol: TCP
livenessProbe:
httpGet:
scheme: HTTP
path: /
port: 8000
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- mountPath: /tmp
name: tmp-volume
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsUser: 1001
runAsGroup: 2001
serviceAccountName: kubernetes-dashboard
nodeSelector:
"kubernetes.io/os": linux
# Comment the following tolerations if Dashboard must not be deployed on master
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
volumes:
- name: tmp-volume
emptyDir: {}
所需要的镜像
grep -w "image" dashboard.yaml
image: kubernetesui/dashboard:v2.7.0
image: kubernetesui/metrics-scraper:v1.0.8
部署
# 创建
kubectl apply -f dashboard.yaml
# 查看pod,svc是否正常运行
kubectl -n kubernetes-dashboard get pod,svc
NAME READY STATUS RESTARTS AGE
pod/dashboard-metrics-scraper-8c47d4b5d-mpt4g 1/1 Running 0 19m
pod/kubernetes-dashboard-64d795fc9c-j6tp5 1/1 Running 0 19m
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/dashboard-metrics-scraper ClusterIP 10.104.252.218 <none> 8000/TCP 19m
service/kubernetes-dashboard NodePort 10.97.10.193 <none> 443:30000/TCP 19m
# 浏览器访问Dashboard登录
https://MasterIP:30000
想要访问dashboard服务,就要有访问权限,创建kubernetes-dashboard管理员角色
# 需要生成一个kubernetes-dashboard token来登陆这个系统,权限很低
kubectl -n kubernetes-dashboard create token kubernetes-dashboard
## 创建service account并绑定默认cluster-admin管理员集群角色。
# 创建用户
kubectl create serviceaccount dashboard-admin -n kubernetes-dashboard
# 用户授权
kubectl create clusterrolebinding dashboard-admin --clusterrole=cluster-admin --serviceaccount=kubernetes-dashboard:dashboard-admin
# 获取用户Token
kubectl create token dashboard-admin -n kubernetes-dashboard
# 使用输出的token登录Dashboard。
3 Kuboard
https://kuboard.cn/install/v3/install-in-k8s.html
8 k8s常用命令总结
################# 查看资源类型 #################
kubectl api-resources
################# 了解pod运行状况 #################
#打印pod完整的资源规范,通过status字段了解
kubectl get TYPE NAME -o yaml|json
~# kubectl -n test get pod nginx-deployment-544dc8b7c4-4h66p -o yaml
~# kubectl -n test get pod nginx-deployment-544dc8b7c4-4h66p -o json
#打印pod资源的详细状态
kubectl describe TYPE NAME
~# kubectl -n test describe pod nginx-deployment-544dc8b7c4-4h66p
#获取pod中容器应用的日志
kube logs [-f] [-p] (POD | TYPE/NAME) [-c CONTAINER]
~# kubectl -n test logs nginx-deployment-544dc8b7c4-4h66p
################# 增加和删除污点 #################
# 查看节点Taints
kubectl describe node NODE_NAME | grep Taints
~# kubectl describe nodes k8s-master |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
# 删除节点Taints(后面 "-" 一定要加)
kubectl taint node NODE_NAME 污点名称-
~# kubectl taint node k8s-master01 node-role.kubernetes.io/master:NoSchedule-
# 增加节点 Taints
kubectl taint node [node] key=value[effect] # value 可以省略
#其中[effect] 可取值: [ NoSchedule | PreferNoSchedule | NoExecute ]
1)NoSchedule: 一定不能被调度
2)PreferNoSchedule: 尽量不要调度
3)NoExecute: 不仅不会调度, 还会驱逐Node上已有的Pod
~# kubectl taint node k8s-node01 node-role.kubernetes.io/node1:NoSchedule
################# 标签 #################
##增加标签
kubectl label -n 命名空间 资源类型 资源名称 标签键名=键值
~# kubectl label node k8s-node01 node_type=work01
##删除标签
kubectl label -n 命名空间 资源类型 资源名称 标签键名-(减号不能忽略)
~# kubectl label node k8s-node01 node_type-
##修改标签
kubectl label -n 命名空间 资源类型 资源名称 标签键名=新的键值 --overwrite
~# kubectl label node k8s-node01 node_type=node01 --overwrite
##查询标签
kubectl get -n 命名空间 资源类型 --show-label [-l 标签键名]或[-l 标签键名=键值](筛选)
~# kubectl get node --show-label
~# kubectl get node --show-label |grep node_type
################# 指定pod调度 #################
#1) 指定nodeName
# pod.spec.nodeName 将 Pod 直接调度到指定的 Node 节点上,会跳过 Scheduler 的调度策略,该匹配规则是强制匹配
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp
spec:
......
spec:
nodeName: node01 #指定了nodeName为node01节点
containers:
- name: myapp
image: soscscs/myapp:v1
.....
# 2) 指定nodeSelector
# pod.spec.nodeSelector:通过 kubernetes 的 label-selector 机制选择节点,由调度器调度策略匹配 label,然后调度 Pod 到目标节点,该匹配规则属于强制约束
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp
spec:
......
spec:
nodeSelector:
node_type: work01 ##指定调度到拥有这个标签的node节点上
containers:
- name: myapp
image: soscscs/myapp:v1
.....
################# 创建ConfigMap对象(两种) #################
# 一 命令式命令
字面量:kubectl create configmap NAME --from-literal=key1=value1
从文件加载:kubectl create configmap NAME --from-file=[key=]/PATH/TO/FILE
# 如果不指定key,默认会将file文件名当作key,后面可以跟多个--from-file
从目录加载: kubectl create configmap NAME --from-file=/PATH/TO/DIR/
# 从目录加载不能指定key,会将目录下所有的文件名当作各自的key
# 二 通过配置文件
提示:基于文件内容生成时,可以使用命令式命令以dry-run模式生成并保存
########################## 删除 ######################
# 强制删除pod
kubectl delete pods prometheus-7fcfcb9f89-qkkf7 --grace-period=0 --force
######################### 拷贝 ###########################
# 拷贝容器内的文件或目录到本地
kubectl cp default/venus-registry-web-8cd94fc99-fws4b:demo.txt demo.txt
kubectl cp default/venus-registry-web-8cd94fc99-fws4b:/home/xxx /home/yyy
# 拷贝本地文件或目录到容器内
kubectl cp demo.txt default/venus-registry-web-8cd94fc99-fws4b:demo.txt
kubectl cp /home/yyy default/venus-registry-web-8cd94fc99-fws4b:/home/xxx
#################### 集群管理命令 ###########################
# 标记k8s-node节点不可调度
kubectl cordon k8s-node
# 标记k8s-node节点可调度
kubectl uncordon k8s-node
# 排除k8s-node节点,准备进行维护
kubectl drain k8s-node
更多推荐
所有评论(0)