0417-K8S笔记
1
CoreDNS
kuberntes 中的 pod 基于 service 域名解析后,再负载均衡分发到 service 后端的各个 pod 服务中,如果没有 DNS 解析,则无法查到各个服务对应的 service 服务。
工作原理:
CoreDNS先将请求传给api-service,api-server去etcd中查询service的IP地址,然后在将数据返回给CoreDNS,CoreDNS在将数据返回给请求的pod。如果请求外网,CoreDNS会向权威dns请求进行解析。
CoreDNS部署
yaml文件部署
1、下载官方镜像文件,将api-server等都下载下来为后面版本升级使用
2、修改yml文件
data:
Corefile: |
.:53 {
errors #错误标准输出
health { #在coredns的http://localhost:8080/health端口提供coredns服务的健康报告
lameduck 5s
}
ready #监听8181端口,当coredns的插件就绪时,访问该端口返200
kubernetes k8s.local in-addr.arpa ip6.arpa { #该地址填写使用kubeasz部署时填写的配置,域名模式:kube-dns.kube-system.svc.k8s.local(NAME.NAMESPACE.svc.CLUSTER_DNS_DOMAIN)
pods insecure
fallthrough in-addr.arpa ip6.arpa
ttl 30
}
prometheus :9153 #coredns的度量指标数据以prometheus的键值对形式在http://localhost:9153/metrics上提供
forward . 223.6.6.6 { #可以填权威dns也可以填公司内部的dns
max_concurrent 1000
}
cache 30 #启用service解析缓存,单位为s
loop 检查域名解析是否有死循环,如果发现就强制终止coredns进程
reload #检查corefile是否更改,重新编辑configmap后,默认两分钟加载
loadbalance #如果一个域名存在多个记录则轮询解析
}
containers:
- name: coredns
image: coredns/coredns:latest #镜像路径,一定要写能够下载下来的路径
imagePullPolicy: IfNotPresent
resources:
limits:
memory: 200Mi #最大内存限制
requests:
cpu: 100m
memory: 70Mi
spec:
selector:
k8s-app: kube-dns
clusterIP: 10.100.0.2 #登录容器命令确认 cat /etc/resolv.conf
dns实现高可用
1、#增加副本数
kubectl get deployments -n kube-system
NAME READY UP-TO-DATE AVAILABLE AGE
calico-kube-controllers 1/1 1 1 2d2h
coredns 1/1 1 1 10m
kubectl edit deployments coredns -n kube-system #修改后直接生效
generation: 2
2、#增加资源限制
dns增加缓存
- pod中部署dnsmasq增加缓存,但缓存pod间不共享
- node节点部署localdns,通过kubeasz部署集群时有配置:ENABLE_LOCAL_DNS_CACHE: true
- codedns增加缓存时间,比如60s、180s,但需要增加资源
Dashboard
通过下载官方yaml文件创建dashboard
kubectl apply -f dashboardv1.yaml
暴露端口能够外网登录
kind: Service
apiVersion: v1
metadata:
labels:
k8s-app: kubernetes-dashboard
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
type: NodePort
ports:
- port: 443
targetPort: 8443
nodePort: 30004 #暴露30004端口
selector:
k8s-app: kubernetes-dashboard
生成token访问
Kubernetes Dashboard 当前,只支持使用 Bearer Token登录。 由于 Kubernetes Dashboard 默认部署时,只配置了最低权限的 RBAC。因此,我们要创建一个名为 admin
的 ServiceAccount,再创建一个 ClusterRolebinding,将其绑定到 Kubernetes 集群中默认初始化的 cluster-admin
这个 ClusterRole。
[root@k8s-master1 dashboard]# cat adminuser.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: admin
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: admin
namespace: kubernetes-dashboard
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: admin
namespace: kubernetes-dashboard
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
[root@k8s-master1 dashboard]# kubectl get secrets -A | grep admin
kubernetes-dashboard admin-token-fsfb2 kubernetes.io/service-account-token 3 93s
[root@k8s-master1 dashboard]# kubectl describe secrets admin-token-fsfb2 -n kubernetes-dashboard
Name: admin-token-fsfb2
Namespace: kubernetes-dashboard
Labels: <none>
Annotations: kubernetes.io/service-account.name: admin
kubernetes.io/service-account.uid: aafaacdc-e3ed-4246-b8e9-d76de4be2c1d
Type: kubernetes.io/service-account-token
Data
====
ca.crt: 1302 bytes
namespace: 20 bytes
token: eyJhbGciOiJSUzI1NiIsImtpZCI6IlJjVXRjX0FEemIxb0Y0OFIyOU03OFEyNkxNbUJNOV9JS25JNmtXbFBKeWsifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlcm5ldGVzLWRhc2hib2FyZCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJhZG1pbi10b2tlbi1mc2ZiMiIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJhZG1pbiIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50LnVpZCI6ImFhZmFhY2RjLWUzZWQtNDI0Ni1iOGU5LWQ3NmRlNGJlMmMxZCIsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDprdWJlcm5ldGVzLWRhc2hib2FyZDphZG1pbiJ9.KTsRFF2WW8tC4J34RV-w2JJdWSYnwfrJwZAvg2JAN3In-2bk5Y_P41y6M-KlxdR05N5oEZPLu3lyFOvvZdVXaS94U0FBBGmk0IWT89J7Dlw3m3Gt5YZDTW7EdulQl_BAc_OUhYV_yJwKx6CdcAEhfrneLSCm0QX4P1L1ahuaUfAkPLnfyI7taUiJFpGL6roOhEob_zASQ6VrjvyCCONVzmeJPBF-xs2Dnf1kd4GcMOuy1sjgEaCn3JPNnAx-NsTfpgQRMn5U4z8lLzkm5KShVBwNTLTs9U8ynhnb8yVgtxE_AbCL6Jj4I4oVZTjvzPdw2HDSY0AlJhVWU__b364r6Q
Kubectl 常用命令
kubectl get svc -A #查看所有的service,不推荐,类似于sql中select *
kubectl get pod -n default #查询指定namespace下的pod,推荐写法
kubectl get deployment -n kube-system #查看控制器
kubectl describe nodes #查看容器运行信息
cpu 100m (5%) 100m (5%) #表示分出去了0.1核cpu,1000m=1核
进入指定pod
kubectl exec -it pod名称 -n namespace -c 指定容器名 -- /bin/sh
删除pod
kubectl delete pods/pod名称 namespace/namespace名称
直接编辑资源
kubectl edit namespace名称 #有些不支持修改,比如名称
创建pod
kubectl create -f yml文件 不能创建已有的,但apply可以允许执行多次还可以完成修改操作
使用jq命令来查看json内容
yum install epel-release
yum install jq #安装jq
kubectl get --raw /api/v1/namespaces/ | jq .kind
查看节点信息
kubectl get nodes/k8s-node1 -o yaml/wide
获取节点状态
kubectl get leases -n kube-node-lease #传统的健康检查需要传递镜像信息极大占用了网络资源,节点租约在kube-node-lease里面,只需要上传节点状态,kubelet每10s获取状态但不上报,只有超过默认5分钟或者发生变动才会上报
kubectl get leases/k8s-node1 -o yaml -n kube-node-lease
查看pod日志
[root@k8s-master1 ~]# kubectl logs -f --tail 1 dashboard-metrics-scraper-8464848978-p26vh -n kubernetes-dashboard
192.168.226.148 - - [20/Apr/2022:13:29:45 +0000] "GET / HTTP/1.1" 200 6 "" "kube-probe/1.23"
192.168.226.148 - - [20/Apr/2022:13:29:55 +0000] "GET / HTTP/1.1" 200 6 "" "kube-probe/1.23"
ETCD介绍
目前是K8S默认使用的key-value数据存储系统,用户保存K8S所有的集群数据,ectd支持分布式集群功能,生产环境使用时需要为etcd数据提供定期备份机制。快速磁盘时etcd部署性能和稳定性的最关键因素。尽可能使用ssd来进行etcd存储
ETCD特点
- 完全复制:集群中每个节点都可以使用完整的存档
- 高可用性:ETCD可用于避免硬件的单点故障或者网络问题
- 一致性:每次读取都会返回跨多主机的最新写入
- 简单:包括一个定义良好、面向用户的API
- 安全:实现了带有可选的客户端证书身份验证的自动化TLS
- 快速:每秒10000次写入的基准速度
- 可靠:使用Raft算法实现了存储的合理分布的ETCD的工作原理
- 有wa-l预写数据机制,先写完日志在写数据。如果没有写完日志代表数据没有写入完成。后期可以通过日志来恢复数据
ETCD配置说明
[root@k8s-master1 ~]# cat /etc/systemd/system/etcd.service
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
Documentation=https://github.com/coreos
[Service]
Type=notify
WorkingDirectory=/var/lib/etcd #数据保存路径
ExecStart=/usr/local/bin/etcd \ #二进制文件路径
--name=etcd-192.168.226.144 \ #当前node名称,每个节点名称不能重复
--cert-file=/etc/kubernetes/ssl/etcd.pem \
--key-file=/etc/kubernetes/ssl/etcd-key.pem \
--peer-cert-file=/etc/kubernetes/ssl/etcd.pem \
--peer-key-file=/etc/kubernetes/ssl/etcd-key.pem \
--trusted-ca-file=/etc/kubernetes/ssl/ca.pem \
--peer-trusted-ca-file=/etc/kubernetes/ssl/ca.pem \ #证书
--initial-advertise-peer-urls=https://192.168.226.144:2380 \#初始化时通告集群端口
--listen-peer-urls=https://192.168.226.144:2380 \ #监听
--listen-client-urls=https://192.168.226.144:2379,http://127.0.0.1:2379 \
--advertise-client-urls=https://192.168.226.144:2379 \ #给客户端apiserver使用
--initial-cluster-token=etcd-cluster-0 \#创建集群时使用的token,同一个集群保持一致
--initial-cluster=etcd-192.168.226.144=https://192.168.226.144:2380 \#集群所有节点信息
--initial-cluster-state=new \
--data-dir=/var/lib/etcd \ #数据存放路径
--wal-dir= \
--snapshot-count=50000 \ #压缩数据
--auto-compaction-retention=10h \#第一次压缩等待1小时,以后每次10小时*10%=1小时压缩一次
--auto-compaction-mode=periodic \#周期性压缩
--max-request-bytes=10485760 \#请求最大字节数,默认一个key 1.5m,官方推荐最大10m
--quota-backend-bytes=8589934592 #磁盘存储空间大小限制,默认2G,超过8G启动有警告
Restart=always
RestartSec=15
LimitNOFILE=65536
OOMScoreAdjust=-999
[Install]
WantedBy=multi-user.target
etcd心跳状态查看
etcdctl endpoint health
etcd节点查看
etcdctl member list
批量检测etcd节点状态
export NODE_IPS="192.168.226.144"
for ip in ${INODE_IPS}; do ETCDCTL_API=3 /usr/local/bin/etcdctl --write-out=table endpoint status --endpoints=https://${ip}:2379 --cacert=/etc/kubernetes/ssl/ca.pem --cert=/etc/kubernetes/ssl/etcd.pem --key=/etc/kubernetes/ssl/etcd-key.pem; done
+------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://192.168.226.144:2379 | b1689af5c25ff092 | 3.5.1 | 3.3 MB | true | false | 5 | 51499 | 51499 | |
+------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------
获取etcd中所有数据
etcdctl get / --prefix --keys-only
通过etcd删除pod
[root@k8s-master1 ~]# etcdctl get / --prefix --keys-only | grep test1
/calico/resources/v3/projectcalico.org/workloadendpoints/default/node2-k8s-net--test1-eth0
/registry/pods/default/net-test1
[root@k8s-master1 ~]# etcdctl del /registry/pods/default/net-test1#不通过api直接删除
1
etcd的watch机制
基于不断监看数据,发生变化会主动出发通知客户端,
[root@k8s-master1 ~]# ETCDCTL_API=3 /usr/local/bin/etcdctl watch /data
PUT
/data
1111
etcd命令备份与恢复数据
snapshot save /data/etcdbackup/etcd-backup-20220420#备份数据
etcdctl snapshot restore /data/etcdbackup/etcd-backup-20220420 --data-dir=/data/restore-etcd#还原数据
修改etcd数据目录并重启
WorkingDirectory=/var/lib/etcd
--data-dir=/var/lib/etcd
通过kubeaz备份集群etcd数据和还原集群数据
恢复数据期间api-server不可用
./ezctl backup k8s-cluster1 #备份指定集群数据
[root@k8s-master1 kubeasz]# kubectl get pods
NAME READY STATUS RESTARTS AGE
net-test1 1/1 Running 0 12m
net-test2 1/1 Running 3 (173m ago) 3d4h
[root@k8s-master1 kubeasz]# kubectl delete pod net-test1
pod "net-test1" deleted
[root@k8s-master1 kubeasz]# kubectl get pods #删除pod验证数据备份
NAME READY STATUS RESTARTS AGE
net-test2 1/1 Running 3 (174m ago) 3d4h
./ezctl restore k8s-cluster1 #使用clusters/k8s-cluster1/backup/snapshot.db来恢复
[root@k8s-master1 kubeasz]# kubectl get pods
NAME READY STATUS RESTARTS AGE
net-test1 0/1 ContainerCreating 1 18m
net-test2 1/1 Running 3 (179m ago) 3d4h
ETCD数据恢复流程
当etcd集群宕机数量超过集群总节点数一半以上的时候(如总数为三台宕机两台),就会导致整合集群宕机,后期需要重新恢复数据,则恢复流程如下:
- 恢复服务器系统
- 重新部署ETCD集群
- 停止kube-apiserver/controller-manager/scheduler/kubelet/kube-proxy
- 停止ETCD集群
- 各ETCD节点恢复同一份备份数据
- 启动各节点并验证ETCD集群
- 启动kube-apiserver/controller-manager/scheduler/kubelet/kube-proxy
- 验证k8s master状态及pod数据
K8S版本升级
master版本升级
master升级涉及到组件:kube-apiserver、kube-controller-manager、kubectl、kubelet、kube-proxy、kube-scheduler。
停流量
vim /etc/kube-lb/conf/kube-lb.conf
stream {
upstream backend {
#server 192.168.226.144:6443 max_fails=2 fail_timeout=3s; #注释掉要升级的节点
}
systemctl restart kube-lb.service #重启服务
#停进程拷贝新版本到指定目录
cp kube-apiserver kube-controller-manager kubectl kubelet kube-proxy kube-scheduler /etc/kubeasz/bin/ #拷贝kubeasz目录下为了后面扩容使用最新版本
systemctl stop kube-apiserver.service kube-controller-manager.service kube-scheduler.service kube-proxy.service kubelet.service #必须停止掉进程才能覆盖
cp kube-apiserver kube-controller-manager kubectl kubelet kube-proxy kube-scheduler /usr/local/bin/
systemctl start kube-apiserver.service kube-controller-manager.service kube-scheduler.service kube-proxy.service kubelet.service
验证是否升级成功
[root@k8s-master1 bin]# calicoctl node status
Calico process is running.
IPv4 BGP status
+-----------------+-------------------+-------+----------+-------------+
| PEER ADDRESS | PEER TYPE | STATE | SINCE | INFO |
+-----------------+-------------------+-------+----------+-------------+
| 192.168.226.145 | node-to-node mesh | up | 13:35:07 | Established |
| 192.168.226.146 | node-to-node mesh | up | 13:35:13 | Established |
+-----------------+-------------------+-------+----------+-------------+
IPv6 BGP status
No IPv6 peers found.
[root@k8s-master1 bin]# kubectl get pods
NAME READY STATUS RESTARTS AGE
net-test1 1/1 Running 1 (44m ago) 22h
net-test2 1/1 Running 4 (45m ago) 4d2h
[root@k8s-master1 bin]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
192.168.226.144 Ready,SchedulingDisabled master 4d3h v1.23.5
192.168.226.145 Ready node 4d2h v1.23.1
192.168.226.146 Ready node 4d2h v1.23.1
[root@k8s-master1 bin]# kube-apiserver --version
Kubernetes v1.23.5
Node版本升级
因为node节点上有容器,升级前需要将pod驱逐到别的node节点上。也可以将node删除掉,k8s也会将pod在其他node上创建
驱逐pod
kubectl drain 192.168.226.145 --force --ignore-daemonsets --delete-emptydir-data #--force强制驱逐 --ignore-daemonsets 控制器级别的可以选择忽略 --delete-emptydir-data 删除使用本地存储的数据
停服务
systemctl stop kubelet.service kube-proxy.service
升级版本
scp kubelet kube-proxy 192.168.226.145:/usr/local/bin/
systemctl start kubelet.service kube-proxy.service
取消调度关闭策略
[root@k8s-master1 ~]# kubectl uncordon 192.168.226.145
node/192.168.226.145 uncordoned
[root@k8s-master1 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
192.168.226.144 Ready,SchedulingDisabled master 4d3h v1.23.5
192.168.226.145 Ready node 4d3h v1.23.5
192.168.226.146 Ready node 4d3h v1.23.1
K8S扩容
./ezctl
add-etcd <cluster> <ip> to add a etcd-node to the etcd cluster
add-master <cluster> <ip> to add a master node to the k8s cluster
add-node <cluster> <ip> to add a work node to the k8s cluster
del-etcd <cluster> <ip> to delete a etcd-node from the etcd cluster
del-master <cluster> <ip> to delete a master node from the k8s cluster
del-node <cluster> <ip> to delete a work node from the k8s cluster
#需要提前做好免密钥认证
扩容后ca证书不会重新签发
Yaml文件配置
创建tomcat 基础pod
[root@k8s-master1 ~]# cat tomcat.yaml
kind: Deployment #指定类型 Deployment、Pod、Service等
#apiVersion: extensions/v1beta1
apiVersion: apps/v1 #指定类型版本,通过命令kubectl api-resources kubectl api-version查看版本,1.18版本前是extensions/v1beta1
metadata: #pod的元数据信息,用来定义资源对象的属性信息
labels: #自定义pod的标签
app: linux66-tomcat-app1-deployment-label #标签名称为app值为linux36-nginx-deployment-label,用于后期筛选,给deployment用
name: linux66-tomcat-app1-deployment #pod名称,不能重复
namespace: linux66 #所属namespace,默认defaults
spec: #定义deployment中容器的详细信息
replicas: 1 #副本数,默认1
selector: #用于筛选匹配的标签,key是app,值是linux66-tomcat-app1-selector,控制副本数,匹配不到副本数为1,匹配到两个会删除一个,保证副本数为1
matchLabels: #定义匹配的标签,必须要设置
app: linux66-tomcat-app1-selector #要匹配的标签
template: #定义模板,必须定义,模板是起到描述要创建的pod的作用
metadata:
labels:
app: linux66-tomcat-app1-selector #该模板创建的容器都会继承这个标签
spec:
containers:#定义pod中容器列表,可以多个至少一个,pod不能动态增减容器
- name: linux66-tomcat-app1-container #容器名称
image: tomcat:7.0.94-alpine
#command: ["/apps/tomcat/bin/run_tomcat.sh"]
#imagePullPolicy: IfNotPresent
imagePullPolicy: Always #拉取策略,IfNotPresent、Always、Never
ports:#声明配置,不是最终决定
- containerPort: 8080
protocol: TCP #TCP,UDP,SCTP
name: http
env: #容器环境变量=虚拟机env,传递数据库密码,IP
- name: "password"
value: "123456"
- name: "age"
value: "18"
resources:#定义资源限制,最多一核cpu和150m内存
limits:
cpu: 1
memory: "150Mi"
requests:
cpu: 300m
memory: "100Mi"
---
kind: Service#暴露容器中服务,给k8s以外的服务访问。kubectl get ep -A如果有IP service能够匹配到容器
apiVersion: v1
metadata:
labels:
app: linux66-tomcat-app1-service-label
name: linux66-tomcat-app1-service
namespace: linux66 #与Pod在同一个service
spec:
#type: NodePort #NodePort,ClusterIP(默认),LoadBalancer,ExternalName
ports: #真正端口转发,不指定,在K8S集群端口范围内随机分配
- name: http
port: 80 #监听端口
protocol: TCP
targetPort: 8080 #容器服务端口
#nodePort: 40003 #宿主机端口,请求-nodePort-port-targetPort-服务
selector:
app: linux66-tomcat-app1-selector#将流量路到选择的pod上,须等于Deployment.spec.selector.matchLabels
验证
更多推荐
所有评论(0)