ansible部署k8s
修改配置文件,
一、k8s集群环境搭建
1.1节点规划
1.1.1 规划ip
1.1.2 修改主机名和hosts文件
1.2置密钥同步
ssh-keygen
apt install sshpass -y
#!/bin/bash
#目标主机列表
IP="
192.168.1.200
192.168.1.201
192.168.1.202
192.168.1.205
192.168.1.206
192.168.1.207
"
for node in ${IP};do
sshpass -p 123456 ssh-copy-id ${node} -o StrictHostKeyChecking=no
echo "${node} 密钥同步完成"
ssh ${node} ln -sv /usr/bin/python3 /usr/bin/python
echo "${node} /usr/bin/pyton3 软连接创建完成"
done
1.3下载kubeasz项目及组件
部署节点
apt install ansible git -y
wget https://github.com/easzlab/kubeasz/releases/download/3.3.1/ezdown
chmod +x ezdown
./ezdown -D 下载
root@ubuntu20:~# ll /etc/kubeasz/
total 108
drwxrwxr-x 12 root root 224 Apr 1 14:17 ./
drwxr-xr-x 107 root root 8192 Apr 1 14:17 ../
drwxrwxr-x 3 root root 23 Jul 3 2022 .github/
-rw-rw-r-- 1 root root 301 Jul 3 2022 .gitignore
-rw-rw-r-- 1 root root 5058 Jul 3 2022 README.md
-rw-rw-r-- 1 root root 20304 Jul 3 2022 ansible.cfg
drwxr-xr-x 3 root root 4096 Apr 1 14:17 bin/
drwxrwxr-x 8 root root 94 Jul 3 2022 docs/
drwxr-xr-x 2 root root 230 Apr 1 14:27 down/
drwxrwxr-x 2 root root 70 Jul 3 2022 example/
-rwxrwxr-x 1 root root 25012 Jul 3 2022 ezctl*
-rwxrwxr-x 1 root root 25266 Jul 3 2022 ezdown*
drwxrwxr-x 10 root root 145 Jul 3 2022 manifests/
drwxrwxr-x 2 root root 322 Jul 3 2022 pics/
drwxrwxr-x 2 root root 4096 Jul 3 2022 playbooks/
drwxrwxr-x 22 root root 323 Jul 3 2022 roles/
drwxrwxr-x 2 root root 48 Jul 3 2022 tools/
1.4自定义host文件
cd /etc/kubeasz
./ezctl --help
./ezctl new k8s-cluster1 创建集群
修改配置文件,⽣产并⾃定义hosts⽂件
vim /etc/kubeasz/clusters/k8s-cluster1/hosts
[etcd]
192.168.1.200
192.168.1.201
# master node(s)
[kube_master]
192.168.1.202
192.168.1.205
# work node(s)
[kube_node]
192.168.1.206
192.168.1.207
编辑config.yml⽂件:
cat /etc/kubeasz/clusters/k8s-cluster1/config.yml
root@ubuntu20:/etc/kubeasz/bin# cat /etc/kubeasz/clusters/k8s-cluster1/config.yml
############################
# prepare
############################
# 可选离线安装系统软件包 (offline|online)
INSTALL_SOURCE: "online"
# 可选进行系统安全加固 github.com/dev-sec/ansible-collection-hardening
OS_HARDEN: false
############################
# role:deploy
############################
# default: ca will expire in 100 years
# default: certs issued by the ca will expire in 50 years
CA_EXPIRY: "876000h"
CERT_EXPIRY: "438000h"
# kubeconfig 配置参数
CLUSTER_NAME: "cluster1"
CONTEXT_NAME: "context-{{ CLUSTER_NAME }}"
# k8s version
K8S_VER: "1.24.2"
############################
# role:etcd
############################
# 设置不同的wal目录,可以避免磁盘io竞争,提高性能
ETCD_DATA_DIR: "/var/lib/etcd"
ETCD_WAL_DIR: ""
############################
# role:runtime [containerd,docker]
############################
# ------------------------------------------- containerd
# [.]启用容器仓库镜像
ENABLE_MIRROR_REGISTRY: true
# [containerd]基础容器镜像
SANDBOX_IMAGE: "harbor.luohw.net/baseimage/pause:3.7"
# [containerd]容器持久化存储目录
CONTAINERD_STORAGE_DIR: "/var/lib/containerd"
# ------------------------------------------- docker
# [docker]容器存储目录
DOCKER_STORAGE_DIR: "/var/lib/docker"
# [docker]开启Restful API
ENABLE_REMOTE_API: false
# [docker]信任的HTTP仓库
INSECURE_REG: '["http://easzlab.io.local:5000","harbor.luohw.net"]'
############################
# role:kube-master
############################
# k8s 集群 master 节点证书配置,可以添加多个ip和域名(比如增加公网ip和域名)
MASTER_CERT_HOSTS:
- "192.168.1.188"
- "192.168.1.189"
- "192.168.1.190"
- "192.168.1.191"
- "api.luohw.net"
- "k8s.easzlab.io"
#- "www.test.com"
# node 节点上 pod 网段掩码长度(决定每个节点最多能分配的pod ip地址)
# 如果flannel 使用 --kube-subnet-mgr 参数,那么它将读取该设置为每个节点分配pod网段
# https://github.com/coreos/flannel/issues/847
NODE_CIDR_LEN: 24
############################
# role:kube-node
############################
# Kubelet 根目录
KUBELET_ROOT_DIR: "/var/lib/kubelet"
# node节点最大pod 数
MAX_PODS: 500
# 配置为kube组件(kubelet,kube-proxy,dockerd等)预留的资源量
# 数值设置详见templates/kubelet-config.yaml.j2
KUBE_RESERVED_ENABLED: "no"
# k8s 官方不建议草率开启 system-reserved, 除非你基于长期监控,了解系统的资源占用状况;
# 并且随着系统运行时间,需要适当增加资源预留,数值设置详见templates/kubelet-config.yaml.j2
# 系统预留设置基于 4c/8g 虚机,最小化安装系统服务,如果使用高性能物理机可以适当增加预留
# 另外,集群安装时候apiserver等资源占用会短时较大,建议至少预留1g内存
SYS_RESERVED_ENABLED: "no"
############################
# role:network [flannel,calico,cilium,kube-ovn,kube-router]
############################
# ------------------------------------------- flannel
# [flannel]设置flannel 后端"host-gw","vxlan"等
FLANNEL_BACKEND: "vxlan"
DIRECT_ROUTING: false
# [flannel] flanneld_image: "quay.io/coreos/flannel:v0.10.0-amd64"
flannelVer: "v0.15.1"
flanneld_image: "easzlab.io.local:5000/easzlab/flannel:{{ flannelVer }}"
# ------------------------------------------- calico
# [calico]设置 CALICO_IPV4POOL_IPIP=“off”,可以提高网络性能,条件限制详见 docs/setup/calico.md
CALICO_IPV4POOL_IPIP: "Always"
# [calico]设置 calico-node使用的host IP,bgp邻居通过该地址建立,可手工指定也可以自动发现
IP_AUTODETECTION_METHOD: "can-reach={{ groups['kube_master'][0] }}"
# [calico]设置calico 网络 backend: brid, vxlan, none
CALICO_NETWORKING_BACKEND: "brid"
# [calico]设置calico 是否使用route reflectors
# 如果集群规模超过50个节点,建议启用该特性
CALICO_RR_ENABLED: false
# CALICO_RR_NODES 配置route reflectors的节点,如果未设置默认使用集群master节点
# CALICO_RR_NODES: ["192.168.1.1", "192.168.1.2"]
CALICO_RR_NODES: []
# [calico]更新支持calico 版本: [v3.3.x] [v3.4.x] [v3.8.x] [v3.15.x]
calico_ver: "v3.19.4"
# [calico]calico 主版本
calico_ver_main: "{{ calico_ver.split('.')[0] }}.{{ calico_ver.split('.')[1] }}"
# ------------------------------------------- cilium
# [cilium]镜像版本
cilium_ver: "1.11.6"
cilium_connectivity_check: true
cilium_hubble_enabled: false
cilium_hubble_ui_enabled: false
# ------------------------------------------- kube-ovn
# [kube-ovn]选择 OVN DB and OVN Control Plane 节点,默认为第一个master节点
OVN_DB_NODE: "{{ groups['kube_master'][0] }}"
# [kube-ovn]离线镜像tar包
kube_ovn_ver: "v1.5.3"
# ------------------------------------------- kube-router
# [kube-router]公有云上存在限制,一般需要始终开启 ipinip;自有环境可以设置为 "subnet"
OVERLAY_TYPE: "full"
# [kube-router]NetworkPolicy 支持开关
FIREWALL_ENABLE: true
# [kube-router]kube-router 镜像版本
kube_router_ver: "v0.3.1"
busybox_ver: "1.28.4"
############################
# role:cluster-addon
############################
# coredns 自动安装
dns_install: "no"
corednsVer: "1.9.3"
ENABLE_LOCAL_DNS_CACHE: false
dnsNodeCacheVer: "1.21.1"
# 设置 local dns cache 地址
LOCAL_DNS_CACHE: "169.254.20.10"
# metric server 自动安装
metricsserver_install: "no"
metricsVer: "v0.5.2"
# dashboard 自动安装
dashboard_install: "no"
dashboardVer: "v2.5.1"
dashboardMetricsScraperVer: "v1.0.8"
# prometheus 自动安装
prom_install: "no"
prom_namespace: "monitor"
prom_chart_ver: "35.5.1"
# nfs-provisioner 自动安装
nfs_provisioner_install: "no"
nfs_provisioner_namespace: "kube-system"
nfs_provisioner_ver: "v4.0.2"
nfs_storage_class: "managed-nfs-storage"
nfs_server: "192.168.1.10"
nfs_path: "/data/nfs"
# network-check 自动安装
network_check_enabled: false
network_check_schedule: "*/5 * * * *"
############################
# role:harbor
############################
# harbor version,完整版本号
HARBOR_VER: "v2.1.3"
HARBOR_DOMAIN: "harbor.easzlab.io.local"
HARBOR_TLS_PORT: 8443
# if set 'false', you need to put certs named harbor.pem and harbor-key.pem in directory 'down'
HARBOR_SELF_SIGNED_CERT: true
# install extra component
HARBOR_WITH_NOTARY: false
HARBOR_WITH_TRIVY: false
HARBOR_WITH_CLAIR: false
HARBOR_WITH_CHARTMUSEUM: true
管理节点上传pause到harbor镜像仓库
mkdir /etc/docker/certs.d/harbor.luohw.net
scp /apps/harbor/certs/luohw.com.crt 192.168.1.200:/etc/docker/certs.d/harbor.luohw.net/
docker login harbor.luohw.net
docker tag 221177c6082a harbor.luohw.net/baseimage/pause:3.7
docker push harbor.luohw.net/baseimage/pause:3.7
所用node配置harbor的域名解析
echo "192.168.1.205 harbor.luohw.net " >> /etc/hosts
执行初始化
./ezctl setup k8s-cluster1 01 #准备CA和基础系统设置
部署etcd集群
./ezctl setup k8s-cluster1 02 #部署etcd集群
各etcd服务器验证etcd服务:
root@ubuntu20:~# export NODE_IPS="192.168.1.200 192.168.1.201 192.168.1.202"
root@ubuntu20:~#
root@ubuntu20:~# for ip in ${NODE_IPS}; do ETCDCTL_API=3 /usr/local/bin/etcdctl --endpoints=https://${ip}:2379 --cacert=/etc/kubernetes/ssl/ca.pem --cert=/etc/kubernetes/ssl/etcd.pem --key=/etc/kubernetes/ssl/etcd-key.pem endpoint health; done
https://192.168.1.200:2379 is healthy: successfully committed proposal: took = 20.090962ms
https://192.168.1.201:2379 is healthy: successfully committed proposal: took = 17.58472ms
部署容器运行时
首先修改一下containerd的模板配置文件,让containerd能够访问自建的harbo,添加私有仓库配置
vi /etc/containerd/config.toml
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."harbor.luohw.net"]
endpoint = ["https://harbor.luohw.net"]
[plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.luohw.net".tls]
insecure_skip_verify = true
[plugins."io.containerd.grpc.v1.cri".registry.configs."harbor.luohw.net".auth]
username = "admin"
password = "123456"
systemctl restart containerd
模板添加,方便后续节点也是最新配置
157行添加
root@ubuntu20:/etc/kubeasz# pwd
/etc/kubeasz
root@ubuntu20:/etc/kubeasz# vim roles/containerd/templates/config.toml.j2
./ezctl setup k8s-cluster1 03
问题
root@ubuntu20:~# crictl pull harbor.luohw.net/baseimage/paus:3.7
E0402 14:44:08.277290 558467 remote_image.go:238] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"harbor.luohw.net/baseimage/paus:3.7\": failed to resolve reference \"harbor.luohw.net/baseimage/paus:3.7\": failed to do request: Head \"https://harbor.luohw.net/v2/baseimage/paus/manifests/3.7\": x509: certificate signed by unknown authority" image="harbor.luohw.net/baseimage/paus:3.7"
FATA[0000] pulling image: rpc error: code = Unknown desc = failed to pull and unpack image "harbor.luohw.net/baseimage/paus:3.7": failed to resolve reference "harbor.luohw.net/baseimage/paus:3.7": failed to do request: Head "https://harbor.luohw.net/v2/baseimage/paus/manifests/3.7": x509: certificate signed by unknown authoriy
注意 /etc/containerd/config.toml 文件的正确性
部署master节点
./ezctl setup k8s-cluster1 04
部署node
./ezctl setup k8s-cluster1 05
部署网络插件
- 管理节点修改tag并上传镜像到仓库
docker push harbor.luohw.net/baseimage/calico-pod2daemon-flexvol:v3.19.4
docker push harbor.luohw.net/baseimage/calico-cni:v3.19.
docker push harbor.luohw.net/baseimage/calico-kube-controllers:v3.19.4
docker push harbor.luohw.net/baseimage/calico-node:v3.19.4
并修改为配置文件私有仓库
root@ubuntu20:~# cat /etc/kubeasz/roles/calico/templates/calico-v3.19.yaml.j2 |grep -n image
213: image: harbor.luohw.net/baseimage/calico-cni:{{ calico_ver }}
257: image: harbor.luohw.net/baseimage/calico-pod2daemon-flexvol:{{ calico_ver }}
268: image: harbor.luohw.net/baseimage/calico-node:{{ calico_ver }}
517: image: harbor.luohw.net/baseimage/calico-kube-controllers:{{ calico_ver }}
./ezctl setup k8s-cluster1 06
添加kubectl命令补全
cat .bashrc
source <(kubectl completion bash) # generated by kubeasz
部署coredns
wget https://raw.githubusercontent.com/coredns/deployment/master/kubernetes/coredns.yaml.sed
docker tag coredns/coredns:1.9.4 harbor.luohw.net/baseimage/coredns:1.9.4
root@k8s-harbor:/apps/harbor# docker push harbor.luohw.net/baseimage/coredns:1.9.4
root@k8s-harbor:/apps/harbor# docker tag coredns/coredns:1.9.4 harbor.luohw.net/baseimages/coredns:1.9.4
root@k8s-harbor:/apps/harbor# docker push harbor.luohw.net/baseimages/coredns:1.9.4
# __MACHINE_GENERATED_WARNING__
apiVersion: v1
kind: ServiceAccount
metadata:
name: coredns
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
kubernetes.io/bootstrapping: rbac-defaults
addonmanager.kubernetes.io/mode: Reconcile
name: system:coredns
rules:
- apiGroups:
- ""
resources:
- endpoints
- services
- pods
- namespaces
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- apiGroups:
- discovery.k8s.io
resources:
- endpointslices
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
labels:
kubernetes.io/bootstrapping: rbac-defaults
addonmanager.kubernetes.io/mode: EnsureExists
name: system:coredns
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:coredns
subjects:
- kind: ServiceAccount
name: coredns
namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
labels:
addonmanager.kubernetes.io/mode: EnsureExists
data:
Corefile: |
.:53 {
errors
health {
lameduck 5s
}
log {
}
ready
kubernetes cluster.local in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153
forward . /etc/resolv.conf {
max_concurrent 1000
}
cache 30
loop
reload
loadbalance
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "CoreDNS"
spec:
# replicas: not specified here:
# 1. In order to make Addon Manager do not reconcile this replicas parameter.
# 2. Default is 1.
# 3. Will be tuned in real time if DNS horizontal auto-scaling is turned on.
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
selector:
matchLabels:
k8s-app: kube-dns
template:
metadata:
labels:
k8s-app: kube-dns
spec:
securityContext:
seccompProfile:
type: RuntimeDefault
priorityClassName: system-cluster-critical
serviceAccountName: coredns
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: k8s-app
operator: In
values: ["kube-dns"]
topologyKey: kubernetes.io/hostname
tolerations:
- key: "CriticalAddonsOnly"
operator: "Exists"
nodeSelector:
kubernetes.io/os: linux
containers:
- name: coredns
image: harbor.luohw.net/baseimages/coredns:1.9.3
imagePullPolicy: IfNotPresent
resources:
limits:
memory: 256Mi
cpu: 200m
requests:
cpu: 100m
memory: 70Mi
args: [ "-conf", "/etc/coredns/Corefile" ]
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
readOnly: true
ports:
- containerPort: 53
name: dns
protocol: UDP
- containerPort: 53
name: dns-tcp
protocol: TCP
- containerPort: 9153
name: metrics
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: 8080
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 5
readinessProbe:
httpGet:
path: /ready
port: 8181
scheme: HTTP
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- NET_BIND_SERVICE
drop:
- all
readOnlyRootFilesystem: true
dnsPolicy: Default
volumes:
- name: config-volume
configMap:
name: coredns
items:
- key: Corefile
path: Corefile
---
apiVersion: v1
kind: Service
metadata:
name: kube-dns
namespace: kube-system
annotations:
prometheus.io/port: "9153"
prometheus.io/scrape: "true"
labels:
k8s-app: kube-dns
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "CoreDNS"
spec:
selector:
k8s-app: kube-dns
clusterIP: 10.100.0.2
ports:
- name: dns
port: 53
protocol: UDP
- name: dns-tcp
port: 53
protocol: TCP
- name: metrics
port: 9153
protocol: TCP
二 、 集群维护
添加master节点 ./ezctl add-master 192.168.1.30
集群升级: 解决bug
github下载新版二进制包
解压
升级master:
node节点注释要升级的master
重新加载 systemctl reload kube-lb
停止scheduler apiserver apiserver controller-manager kube-proxy服务
升级:master替换二进制文件, 在/usr/local/bin
kube-apiserver kube-proxy kubectl kube-controller-manager kube-scheduler kubelet
取消注释,重新加载kube-lb
systemctl reload kube-lb
升级node
驱逐pod kubectl drain 192.168.1.30 --ignore-daemonsets
驱逐完成后,停用kubelet和kube-proxy
替换二进制
重启kubelet和kube-proxy
验证版本
取消禁止调度
kubectl uncordon 192.168.122.20
更多推荐
所有评论(0)