版权声明:可以任意转载,转载时请务必以超链接形式标明文章原始出处和作者信息及本版权声明 (作者:张华 发表于:2018-07-13)

Bootstrapping master with kubeadm

https://kubernetes.io/docs/setup/independent/install-kubeadm/
Create a VM (ubuntu 20.04) juju-f5e447-kubeadm-0.cloud.sts by
juju add-machine --constraints "mem=4G root-disk=40G" --series focal -n2

# Reset env
kubectl drain juju-f5e447-kubeadm-0.cloud.sts --delete-emptydir-data --force --ignore-daemonsets
kubectl get node
kubectl delete node juju-f5e447-kubeadm-0.cloud.sts
sudo kubeadm reset

# Installing kubeadm, kubelet, kubectl, docker
sudo apt update && sudo apt install -y apt-transport-https curl
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
# it's also use 'kubernetes-xenial' for focal
sudo bash -c 'cat >/etc/apt/sources.list.d/kubernetes.list' <<EOF
deb https://apt.kubernetes.io/ kubernetes-xenial main
EOF
sudo apt update
sudo apt-cache madison kubeadm |grep 1.20.11-00
sudo apt install -y kubelet=1.20.11-00 kubeadm=1.22.0-00 kubectl=1.20.11-00
sudo apt-mark hold kubelet kubeadm kubectl  #prevent upgrading
sudo cat /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
sudo apt install -y docker.io
sudo systemctl enable docker.service

# Creating a single master cluster with kubeadm
# For flannle to work correctly, you must pass --pod-network-cidr=10.244.0.0/16 
https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
# sudo kubeadm reset
# need 'KUBELET_EXTRA_ARGS="--fail-swap-on=false" if having 'ignore-preflight-errors=swap'
sudo kubeadm init  --pod-network-cidr=10.244.0.0/16 --v=5
#sudo kubeadm init --kubernetes-version=1.20.11 --pod-network-cidr=10.244.0.0/16 --service-cidr=10.96.0.0/12 --ignore-preflight-errors=swap --v=5
kubectl describe ds kube-flannel-ds-amd64 --namespace kube-system
cat /etc/cni/net.d/10-flannel.conflist 

# the following commands come from the output of 'kubeadm init'
# export KUBECONFIG=/etc/kubernetes/admin.conf
mkdir -p $HOME/.kube
rm -rf ~/.kube/config && sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
kubectl cluster-info

# configure kubectl completion
kubectl completion bash | sudo tee /etc/bash_completion.d/k8s

# Installing a pod network add-on
sudo bash -c 'cat >> /etc/sysctl.conf' << EOF
net.bridge.bridge-nf-call-iptables=1
EOF
sysctl -p
sysctl net.bridge.bridge-nf-call-iptables=1
kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
kubectl get daemonsets --all-namespaces
kubectl get nodes --all-namespaces -o wide

# Joining your nodes with kubeadm way, we will aslo try tls bootstrap way below
ssh cloud_user@joshuazhang3c.mylabserver.com -v
sudo -i
apt install -y docker.io
systemctl enable docker.service
apt update && apt install -y apt-transport-https curl
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
cat <<EOF >/etc/apt/sources.list.d/kubernetes.list
deb https://apt.kubernetes.io/ kubernetes-xenial main
EOF
apt update
apt install -y kubeadm

# kubeadm token list
# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
# kubectl get secrets --namespace=kube-system bootstrap-token-9k9xoo -o jsonpath='{.data.token-id}' |base64 -d
# kubectl get secrets --namespace=kube-system bootstrap-token-9k9xoo -o jsonpath='{.data.token-secret}' |base64 -d
kubeadm join 172.31.19.84:6443 --token 0c4fdy.xptpmgh4eqihxh66 --discovery-token-ca-cert-hash sha256:b227cfd35c9d1ad42d8692576c0a453271741f59e5052c98674bc075b0789a17

20211103更新, 上面的步骤过时了,会报下列错:

Nov 02 10:35:19 juju-f5e447-kubeadm-0 kubelet[893605]: E1102 10:35:19.571208  893605 server.go:294] "Failed to run kubelet" err="failed to run Kubelet: misconfiguration: kubelet cgroup driver: \"systemd\" is different from docker cgroup driver: \"cgroupfs\""

那是因为对于docker应该使用cgroupDriver=cgroupfs,而不是默认的systemd, 另外现在也采用config.yaml的方式部署(存储在map里方便升级kubelet),修改如下:

kubeadm config print init-defaults --component-configs KubeletConfiguration > config.yaml
# grep -r 'cgroupDriver' config.yaml 
cgroupDriver: cgroupfs
# grep -i 'subnet' config.yaml -B2
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
  podSubnet: 10.244.0.0/16
kubeadm init --config config.yaml

下面是改用containerd的方法,用containerd时必须使用cgroupDriver=systemd

sudo modprobe br_netfilter
# Setup required sysctl params, these persist across reboots.
cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
sudo sysctl --system
sudo apt-get remove docker docker-engine docker.io containerd runc
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release -y
sudo apt-cache madison containerd.io
sudo apt-get install containerd.io -y
containerd config default | sudo tee /etc/containerd/config.toml
# grep -r 'SystemdCgroup' /etc/containerd/config.toml -B1
          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
            SystemdCgroup = true
sudo systemctl restart containerd
#sudo curl -fsSLo /usr/share/keyrings/kubernetes-archive-keyring.gpg https://packages.cloud.google.com/apt/doc/apt-key.gpg
#echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main" |sudo tee /etc/apt/sources.list.d/kubernetes.list
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
sudo bash -c 'cat >/etc/apt/sources.list.d/kubernetes.list' <<EOF
deb https://apt.kubernetes.io/ kubernetes-xenial main
EOF
sudo apt update
sudo mkdir -p /etc/systemd/system/kubelet.service.d/
cat << EOF | sudo tee /etc/systemd/system/kubelet.service.d/0-containerd.conf
[Service]
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
EOF
sudo systemctl daemon-reload
sudo apt-get install -y kubelet=1.20.11-00 kubeadm=1.20.11-00 -y
cat << EOF | tee kubeadm-config.yaml
kind: ClusterConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
kubernetesVersion: v1.20.11
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
imageRepository: k8s.gcr.io
networking:
dnsDomain: cluster.local
podSubnet: 192.168.0.0/16
serviceSubnet: 10.96.0.0/12
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
cgroupDriver: systemd
EOF
sudo kubeadm init --cri-socket /run/containerd/containerd.sock --config kubeadm-config.yaml

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

k8s开发的CRI接口统一了containerd与docker的调用, 这样k8s中有crictl这个命令更方便操作容器与Pod.
在这里插入图片描述
在这里插入图片描述

计算节点重复上面安装containerd与kubeadm, 但不需要运行上面最后$HOME/.kube/config相关的三条命令,然后运行’kubeadm join’命令(其实这条命令提示在kubeadm init的输出中)

sudo kubeadm join 10.5.3.198:6443 --token 0pn9jv.w23lpgezwy6hortd \
    --discovery-token-ca-cert-hash sha256:68af6f0304fc38ae569650fc50896723eb4dfbbc8332cc09a1c343c2dcf8a1fe

Bootstrapping workerwith tls bootstrapping

https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
Just need to copy two files(/var/lib/kubelet/config.yaml and ca.crt) to new worker node, then use bootstrap token (temporary auth) and ca.crt to generate kubeconfig, finally restart kubelet with 'kubelet --bootstrap-kubeconfig="/etc/kubelet/bootstrap-kubelet.conf" --kubeconfig="/etc/kubelet/kubeconfig.conf" --config="/var/lib/kubelet/config.yaml"', then master will create the certificate(for non-temporary auth) for worker and auto approve them.

a, Principle behind. kubeadm has created bootstrap token with the auth-extra-groups 'system:bootstrappers:kubeadm:default-node-token' for us. You can change the group name 'system:bootstrappers:kubeadm:default-node-token' to another, eg: system:bootstrappers:myworkers

$ kubectl get --namespace=kube-system secrets bootstrap-token-9k9xoo -o jsonpath='{.data.auth-extra-groups}' |base64 -d
system:bootstrappers:kubeadm:default-node-token

sudo bash -c 'cat > bootstrap-token.yaml' <<EOF
# https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/
apiVersion: v1
kind: Secret
metadata:
  # Name MUST be of form "bootstrap-token-<token id>"
  name: bootstrap-token-07401b
  namespace: kube-system
# Type MUST be 'bootstrap.kubernetes.io/token'
type: bootstrap.kubernetes.io/token
stringData:
  # Human readable description. Optional.
  description: "The default bootstrap token generated by 'kubeadm init'."
  # Token ID and secret. Required.
  token-id: 07401b
  token-secret: f395accd246ae52d
  # Expiration. Optional.
  expiration: 2019-03-10T03:22:11Z
  # Allowed usages.
  usage-bootstrap-authentication: "true"
  usage-bootstrap-signing: "true"
  # Extra groups to authenticate the token as. Must start with "system:bootstrappers:"
  auth-extra-groups: system:bootstrappers:kubeadm:default-node-token
EOF
kubectl create -f bootstrap-token.yaml
kubectl describe secrets --namespace=kube-system bootstrap-token-07401b
kubectl get secrets --namespace=kube-system bootstrap-token-07401b -o jsonpath={.data.token-id} |base64 -d
kubectl get secrets --namespace=kube-system bootstrap-token-07401b -o jsonpath={.data.token-secret} |base64 -d

So the following 'kubeadm token create' will create a new token with the auth-extra-groups 'system:bootstrappers:kubeadm:default-node-token'.
$ kubeadm token create
iate9c.v9qhw2dyngxfcsig
TOKEN_ID=$(kubectl get secrets --namespace=kube-system bootstrap-token-iate9c -o jsonpath='{.data.token-id}' |base64 -d)
TOKEN_SECRET=$(kubectl get secrets --namespace=kube-system bootstrap-token-iate9c -o jsonpath='{.data.token-secret}' |base64 -d)

b, Principle behind. kubeadm has also create the following 3 clusterrolebindings to map the group 'system:bootstrappers:kubeadm:default-node-token' for us. If you are using the new group 'system:bootstrappers:myworkers', here you need to change to 'system:bootstrappers:myworkers' or 'system:bootstrappers'.

#Authorize kubelet to create CSR by mapping the clusterrole 'system:node-bootstrapper' to the group 'system:bootstrappers' or 'system:bootstrappers:joshuazhang2c.mylabserver.com'
kubectl create clusterrolebinding create-csrs-for-bootstrapping --group=system:bootstrappers:kubeadm:default-node-token --clusterrole=system:node-bootstrapper

#Auto approve all CSRs by mapping the clusterrole 'selfnodeclient' to the group "system:bootstrappers" or 'system:bootstrappers:joshuazhang2c.mylabserver.com'
kubectl create clusterrolebinding auto-approve-csrs-for-group --group=system:bootstrappers:kubeadm:default-node-token --clusterrole=system:certificates.k8s.io:certificatesigningrequests:nodeclient

#Auto approve renewal CSRs by mapping the clusterrole 'selfnodeclient' to the group "system:nodes"
kubectl create clusterrolebinding auto-approve-renewals-csrs-for-group --group=system:nodes --clusterrole=system:certificates.k8s.io:certificatesigningrequests:selfnodeclient

So we can use the following CertificateSigningRequest to self-test it. 

openssl genrsa -out joshuazhang2c.mylabserver.com.key
openssl req -new -key joshuazhang2c.mylabserver.com.key -out joshuazhang2c.mylabserver.com.csr -subj "/CN=system:node:joshuazhang2c.mylabserver.com/O=system:nodes"
openssl x509 -req -in joshuazhang2c.mylabserver.com.csr -CA /etc/kubernetes/pki/ca.crt -CAkey /etc/kubernetes/pki/ca.key -CAcreateserial -out joshuazhang4c.mylabserver.com.crt -days 45
cat <<EOF | kubectl apply -f -
apiVersion: certificates.k8s.io/v1beta1
kind: CertificateSigningRequest
metadata:
  name: system:node:joshuazhang2c.mylabserver.com
spec:
  groups:
    - system:nodes
  request: $(cat joshuazhang2c.mylabserver.com.csr | base64 | tr -d '\n')
  usages:
    - key encipherment
    - digital signature
    - client auth
EOF
kubectl get csr 

c, Generate kubeconfig with bootstrap token and ca
ENDPOINT=$(kubectl describe service kubernetes |grep -i endpoints |awk '{print $2}')
CLUSTER=$(kubectl config view |grep '  cluster:' |awk '{print $2}')
TOKEN_ID=$(kubectl get secrets --namespace=kube-system bootstrap-token-iate9c -o jsonpath='{.data.token-id}' |base64 -d)
TOKEN_SECRET=$(kubectl get secrets --namespace=kube-system bootstrap-token-iate9c -o jsonpath='{.data.token-secret}' |base64 -d)
sudo kubectl config --kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf set-cluster ${CLUSTER} --server=https://${ENDPOINT} --certificate-authority=/etc/kubernetes/pki/ca.crt
sudo kubectl config --kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf set-credentials kubelet-bootstrap --token=${TOKEN_ID}.${TOKEN_SECRET}
sudo kubectl config --kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf set-context bootstrap --user=kubelet-bootstrap --cluster=${CLUSTER}
sudo kubectl config --kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf use-context bootstrap

# Copy the following files from master to the new node
scp joshuazhang1c.mylabserver.com:/etc/kubernetes/bootstrap-kubelet.conf . && sudo mv bootstrap-kubelet.conf /etc/kubernetes/
scp joshuazhang1c.mylabserver.com:/etc/kubernetes/pki/ca.crt . && sudo mv ca.crt /etc/kubernetes/pki/
scp joshuazhang1c.mylabserver.com:/var/lib/kubelet/config.yaml . && sudo mv config.yaml /var/lib/kubelet/

c, Install kubelet in the new node joshuazhang2c.mylabserver.com

sudo apt update && sudo apt install -y apt-transport-https curl
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
sudo bash -c 'cat >/etc/apt/sources.list.d/kubernetes.list' <<EOF
deb https://apt.kubernetes.io/ kubernetes-xenial main
EOF
sudo apt update
sudo apt install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl
cat /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
sudo apt install -y docker.io
sudo systemctl enable docker.service

sudo bash -c 'cat >/lib/systemd/system/kubelet.service' <<EOF
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/home/
After=docker.service
Requires=docker.service
[Service]
#ExecStart=/usr/bin/kubelet --bootstrap-kubeconfig="/etc/kubernetes/bootstrap-kubelet.conf" --kubeconfig="/etc/kubernetes/kubeconfig.conf" --config="/var/lib/kubernetes/config.yaml"
ExecStart=/usr/bin/kubelet --bootstrap-kubeconfig="/etc/kubernetes/bootstrap-kubelet.conf" --kubeconfig="/etc/kubernetes/kubeconfig.conf" --pod-manifest-path="/etc/kubernetes/manifests/" --feature-gates=RotateKubeletClientCertificate=true
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload; sudo systemctl restart kubelet
sudo systemctl status kubelet

# Verify
kubectl get nodes joshuazhang2c.mylabserver.com
# kubectl get csr
NAME                                                   AGE   REQUESTOR                 CONDITION
node-csr-mdou0axSg2vlk5wx2_a1uA0-buvaC-PsiF69Jvjg110   87s   system:bootstrap:iate9c   Approved,Issued
# ls /var/lib/kubelet/pki/
kubelet-client-2018-12-04-08-50-51.pem  kubelet-client-current.pem  kubelet.crt  kubelet.key
# openssl x509 -noout -text -in /var/lib/kubelet/pki/kubelet-client-current.pem |grep system:node
        Subject: O = system:nodes, CN = system:node:joshuazhang4c.mylabserver.com

附件 - RBAC authentication

kubectl create ns development
kubectl create ns production
$ kubectl config get-contexts
CURRENT   NAME           CLUSTER        AUTHINFO   NAMESPACE
*         juju-context   juju-cluster   admin

sudo useradd -s /bin/bash DevHua
sudo passwd DevHua

# Generate a private key, then Certificate Signing Request (CSR) for DevHua
openssl genrsa -out DevHua.key
openssl req -new -key DevHua.key -out DevHua.csr -subj "/CN=DevHua/O=development"
# Using the newly created request generate a self-signed certificate using the x509 protocol
openssl x509 -req -in DevHua.csr -CA /etc/kubernetes/pki/ca.crt -CAkey /etc/kubernetes/pki/ca.key -CAcreateserial -out DevHua.crt -days 45

kubectl config view
kubectl config set-credentials --help
kubectl config set-credentials DevHua --client-certificate=./DevHua.crt --client-key=./DevHua.key
kubectl config set-context --help
kubectl config set-context DevHua-context --cluster=juju-cluster --namespace=development --user=DevHua
kubectl --context=DevHua-context get pods
#kubectl config use-context DevHua-context
kubectl config get-contexts

sudo bash -c 'cat > role-dev.yaml' <<EOF
kind: Role
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  namespace: development
  name: developer
rules:
- apiGroups: ["", "extensions", "apps"]
  resources: ["deployments", "replicasets", "pods"]
  verbs: ["list", "get", "watch", "create", "update", "patch", "delete"]
EOF
kubectl create -f role-dev.yaml
kubectl -n development describe roles developer

sudo bash -c 'cat > rolebind.yaml' <<EOF
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: developer-role-binding
  namespace: development
subjects:
  - kind: User
    name: DevHua
    apiGroup: ""
roleRef:
  kind: Role
  name: developer
  apiGroup: ""
EOF
kubectl apply -f rolebind.yaml
kubectl -n development describe rolebinding developer-role-binding

kubectl --context=DevHua-context run nginx --image=nginx
kubectl --context=DevHua-context get pods
kubectl --context=DevHua-context delete deploy nginx

sudo bash -c 'cat > adminrolebind.yaml' <<EOF
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: developer-adminrole-binding
  namespace: development
subjects:
  - kind: User
    name: DevHua
    apiGroup: ""
roleRef:
  kind: ClusterRole
  name: cluster-admin
  apiGroup: ""
EOF
kubectl apply -f adminrolebind.yaml
kubectl --context=DevHua-context get pods

kubectl apply -f role-prod.yaml
vim role-prod.yaml
kind: Role
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  namespace: production #<<- This line
  name: dev-prod #<<- and this line
rules:
- apiGroups: ["", "extensions", "apps"]
  resources: ["deployments", "replicasets", "pods"]
  verbs: ["get", "list", "watch"] #<<- and this one

kubectl apply -f rolebindprod.yaml
vim rolebindprod.yaml
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: production-role-binding
  namespace: production
subjects:
- kind: User
  name: DevDan
  apiGroup: ""
roleRef:
  kind: Role
  name: dev-prod
  apiGroup: ""

kubectl config set-context ProdHua-context --cluster=kubernetes --namespace=production --user=DevHua
kubectl --context=ProdHua-context run nginx --image=nginx

附件 - RBAC authentication in Dashboard

# Use default anonymous user
# generate client-certificate-data
grep 'client-certificate-data' /var/run/kubernetes/admin.kubeconfig | head -n 1 | awk '{print $2}' | base64 -d >> kubecfg.crt
# generate client-key-data
grep 'client-key-data' /var/run/kubernetes/admin.kubeconfig | head -n 1 | awk '{print $2}' | base64 -d >> kubecfg.key
# generate p12
openssl pkcs12 -export -clcerts -inkey kubecfg.key -in kubecfg.crt -out kubecfg.p12 -name "kubernetes-client"

kubectl get secret -n kube-system | grep dashboard
kubectlh -n kube-system  get secret kubernetes-dashboard-token-kglhd -o jsonpath={.data.token}| base64 -d

# Use admin user
cat > /tmp/admin-user.yaml <<EOF
apiVersion: v1
kind: ServiceAccount
metadata:
  name: admin
  namespace: kube-system
EOF
cat > /tmp/admin-user-role-binding.yaml <<EOF
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: admin
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: admin
  namespace: kube-system
EOF
kubectl create -f /tmp/admin-user.yaml
kubectl create -f /tmp/admin-user-role-binding.yaml
kubectl -n kube-system describe secret $(kubectl -n kube-system get secret | grep admin | awk '{print $1}')

附件 - TLS bootstrapping

https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
https://www.codercto.com/a/23740.html
Workers must use a certificate issued by masters to communicatate with masters. To save the workload of creating certificates each time the worker is added, kubelet in worker will use a predefined certificate bootstrap-kubelet.conf to request masters to apply for cerfificate for this worker dynamically.
kubelet has two ports, one is 10250 used to provide read/write tls private api, one is 10255 used to provide read-only non-tls private api.
Bootstrap Token Secret (kubectl describe secrets --namespace=kube-system bootstrap-signer-token-8xsmh) will replace the previous token.csv.

kube-apiserver side receives the requests for certificates from the kubelet and authenticates those requests:
a, Recognizing CA that signs the client certificate
   kube-apiserver --client-ca-file=/etc/kubernetes/pki/ca.crt --enable-bootstrap-token-auth=true ...
b, Authenticating the bootstrapping kubelet to the system:bootstrappers group
# Create Bootstrap Token
echo "$(head -c 6 /dev/urandom | md5sum | head -c 6)"."$(head -c 16 /dev/urandom | md5sum | head -c 16)"
vdb9xb.jiqhz35y355g1ngx
vdb9xb.jiqhz35y355g1ngx,kubelet-bootstrap,10001,"system:bootstrappers"  #token.csv
c, Authorize the bootstrapping kubelet to create a certificate signing request (CSR)
kubectl describe roles.rbac.authorization.k8s.io --namespace=kube-system system:controller:bootstrap-signer
sudo bash -c 'cat < rolebinding.yaml' <<EOF
# enable bootstrapping nodes to create CSR
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: create-csrs-for-bootstrapping
subjects:
- kind: Group
  name: system:bootstrappers
  apiGroup: rbac.authorization.k8s.io
roleRef:
  kind: ClusterRole
  name: system:node-bootstrapper
  apiGroup: rbac.authorization.k8s.io
EOF

kube-controller-manager side is responsible for issuing actual signed certificates:
a, access to the “kuberetes CA key and certificate” that you created and distributed
kube-controller-manager --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt --cluster-signing-key-file=/etc/kubernetes/pki/ca.key ...
b, approve CSR signing automatically
sudo bash -c 'cat < certificatesigningrequests.yaml' <<EOF
# Approve all CSRs for the group "system:bootstrappers"
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: auto-approve-csrs-for-group
subjects:
- kind: Group
  name: system:bootstrappers
  apiGroup: rbac.authorization.k8s.io
roleRef:
  kind: ClusterRole
  name: system:certificates.k8s.io:certificatesigningrequests:nodeclient
  apiGroup: rbac.authorization.k8s.io
EOF
sudo bash -c 'cat < renewal.yaml' <<EOF
# Approve renewal CSRs for the group "system:nodes"
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: auto-approve-renewals-for-nodes
subjects:
- kind: Group
  name: system:nodes
  apiGroup: rbac.authorization.k8s.io
roleRef:
  kind: ClusterRole
  name: system:certificates.k8s.io:certificatesigningrequests:selfnodeclient
  apiGroup: rbac.authorization.k8s.io
EOF

kubelet side:
kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf ...
# cat /etc/kubernetes/bootstrap-kubelet.conf
apiVersion: v1
clusters:
- cluster:
    certificate-authority-data: [xxx]
    server: https://172.31.43.252:6443
  name: kubernetes
contexts:
- context:
    cluster: kubernetes
    user: tls-bootstrap-token-user
  name: tls-bootstrap-token-user@kubernetes
current-context: tls-bootstrap-token-user@kubernetes
kind: Config
preferences: {}
users:
- name: tls-bootstrap-token-user
  user:
    token: vdb9xb.jiqhz35y355g1ngx

In Summary:
kubectl get secrets -n kube-system |grep -i bootstrap
kubectl -n kube-system get secret bootstrap-signer-token-8xsmh -o jsonpath={.data.token}| base64 -d

附件 - microk8s

https://github.com/zhhuabj/exercise/blob/master/container/microk8s.txt
sudo snap install microk8s --edge --classic
snap list
journalctl -u snap.microk8s.daemon-apiserver.service
sudo usermod -a -G microk8s $USER && sudo chown -f -R $USER ~/.kube && newgrp microk8s  #avoid using ‘sudo’ for microk8s command

#Failed to pull image "k8s.gcr.io/kubernetes-dashboard-amd64:v1.10.1" due to gfw - https://microk8s.io/docs/install-proxy 
#echo 'forward-socks5  / 192.168.99.1:7070 .' |sudo tee -a /etc/privoxy/config
#sudo systemctl restart privoxy
#echo 'HTTPS_PROXY=http://127.0.0.1:8118 |sudo tee -a /var/snap/microk8s/current/args/containerd-env
#sudo /snap/bin/microk8s.stop && sudo /snap/bin/microk8s.start
#or change to use - https://github.com/canonical/microk8s/issues/472 
sed -i "s#k8s.gcr.io#registry.cn-hangzhou.aliyuncs.com/google_containers#g" /var/snap/microk8s/current/args/containerd-template.toml
sudo systemctl restart snap.microk8s.daemon-containerd.service
cat /var/snap/microk8s/3629/args/containerd.toml

sudo /snap/bin/microk8s.kubectl config view --raw > ~/.kube/config
microk8s.kubectl get no
microk8s.kubectl get all --all-namespaces
microk8s.kubectl get pods --all-namespaces
microk8s.enable dns dashboard
microk8s.kubectl describe pod kubernetes-dashboard-7d75c474bb-rtzm5 --namespace=kube-system
token=$(microk8s.kubectl -n kube-system get secret | grep default-token | cut -d " " -f1)
microk8s.kubectl -n kube-system describe secret $token
microk8s.kubectl cluster-info
lynx http://xxxx

cat /var/snap/microk8s/current/args/kube*
#https://blog.csdn.net/quqi99/article/details/81032421 
sudo /snap/bin/microk8s.ctr --namespace k8s.io image ls
sudo /snap/bin/microk8s.ctr --namespace k8s.io containers ls

alias kubectl='sudo /snap/bin/microk8s.kubectl'

附件 - 其他

# How to generate yaml template
kubectl run --restart=Always # creates a Deployment
kubectl run --restart=Never # creates bare pod
kubectl run --restart=OnFailure # creates a Job.

# template
pod template:  kubectl run --generator='run-pod/v1' nginx --image=nginx --dry-run=true -o yaml
service and deployment template: kubectl run nginx --service-generator='service/v2' --image=nginx --dry-run=true --expose --port 80 -o yaml
job template: kubectl run --generator=job/v1 nginx --image=nginx --dry-run=true -o yaml
# or use
kubectl create deployment nginx --image nginx --dry-run=true -o yaml
kubectl create job nginx --image nginx --dry-run=true -o yaml

# jq, jsonpath, sort-by, kubectl top etc
kubectl delete pods,services -l name=myLabel --include-uninitialized
kubectl get pods --field-selector=status.phase=Running
kubectl get pod ubuntu -o yaml |sed 's/\(image: ubuntu\):.*$/\1:18.04/' |kubectl replace -f -
kubectl top pod -l name=nginx-ingress-kubernetes-worker
kubectl get pods --sort-by=.metadata.name

kubectl get -o template pod/web-pod-13je7 --template={{.status.phase}}

kubectl get nodes -o jsonpath='{.items[*].metadata.name}' equals kubectl get nodes -o jsonpath='{.items..metadata.name}'
kubectl get nodes -o jsonpath='{.items[].metadata.name}' equals kubectl get nodes -o jsonpath='{.items[0].metadata.name}'
kubectl get nodes -o jsonpath='{.items[*].status.addresses[?(@.type=="InternalIP")].address}'

kubectl get pods -o json |jq '.items[].spec.containers[].env[]?.valueFrom.secretKeyRef.name' |grep -v null |sort |uniq

# just get pod's names
kubectl get pod -l app=nginx -o json |jq '.items[].metadata.name'
kubectl get pods -l app=nginx -o=custom-columns=NAME:.metadata.name
kubectl get pods -l app=nginx -o=name

# dont's forget --record
#kubectl rollout pause deployment/scale-deploy
#kubectl set resources deployment/scale-deploy -c=nginx --limits=cpu=200m,memory=512Mi
#kubectl rollout resume deployment/scale-deploy
deployment.apps/nginx-deployment resource requirements updated
kubectl set image deploy scale-deploy nginx=nginx:1.9.1 --record
kubectl rollout history deployment/scale-deploy
kubectl rollout history deployment/scale-deploy --revision=1
kubectl rollout undo deployment/scale-deploy
kubectl rollout undo deployment/scale-deploy --to-revision=2
kubectl scale deployment/scale-deploy --replicas=2
kubectl autoscale deployment/scale-deploy --min=3 --max=4 --cpu-percent=80

# volume template
cat > test_pod.yaml <<EOF
apiVersion: v1
kind: Pod
metadata:
  name: test-pod
spec:
  #initContainers:
  containers:
  - image: nginx
    name: nginx
    volumeMounts:
    - mountPath: /test
      name: secret-volume
    env:
    - name: PASS
      valueFrom:
        secretKeyRef:
          name: test-secret
          key: passwd
  volumes:
  - name: hostpath-volume
    hostPath:
      path: /data
  - name: emptydir-volume
    emptyDir: {}
  - name: secret-volume
    secret:
      secretName: test-secret
EOF
kubectl create --save-config -f test_pod.yaml
kubectl apply --record -f test_pod.yaml

# initcontainers
apiVersion: v1
kind: Pod
metadata:
  name: init-pod
spec:
  containers:
  - name: nginx
    image: nginx
    volumeMounts:
    - name: workdir
      mountPath: /usr/share/nginx/html
  initContainers:
  - name: touch
    image: busybox:1.28
    command: ['touch', '/work-dir/index.html']
    volumeMounts:
    - name: workdir
      mountPath: "/work-dir"
  volumes:
  - name: workdir
    emptyDir: {}

# Create a pod that uses secrets
kubectl create secret generic test-secret --from-literal=usename=hua --from-literal=passwd=password --dry-run -o yaml
kubectl create secret generic test-secret --from-literal=usename=hua --from-literal=passwd=password
kubectl get pods --namespace=kube-system kube-flannel-ds-amd64-4mt82 -o yaml > pod_template.yaml
cat > pod-secret.yaml
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: question1
  name: pod-secret
spec:
  containers:
  - image: nginx
    name: nginx
    volumeMounts:
    - mountPath: /mnt/secret
      name: test-secret-vol
  volumes:
  - name: test-secret-vol
    secret:
      secretName: test-secret
EOF
kubectl exec pod-secret -- cat /mnt/secret/passwd
cat > pod_secret_env.yaml <<EOF
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: question1
  name: pod-secret-env
spec:
  containers:
  - image: nginx
    name: nginx
    env:
    - name: PASS
      valueFrom:
        secretKeyRef:
          name: test-secret
          key: passwd
EOF
kubectl exec pod-secret-env -- env |grep PASS

# etcd 3
ETCDCTL_API=3 etcdctl --help |grep snap
ETCDCTL_API=3 etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/apiserver-etcd-client.crt --key=/etc/kubernetes/pki/apiserver-etcd-client.key member list
ETCDCTL_API=3 etcdctl --endpoints=https://[127.0.0.1]:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/apiserver-etcd-client.crt --key=/etc/kubernetes/pki/apiserver-etcd-client.key snapshot save snapshot.db
etcdctl --endpoints=https://[127.0.0.1]:2379 --ca-file=/etc/kubernetes/pki/etcd/ca.crt --cert-file=/etc/kubernetes/pki/apiserver-etcd-client.crt --key-file=/etc/kubernetes/pki/apiserver-etcd-client.key cluster-health

# PV & PVC & Pod
cat > pv.yaml <<EOF
pv + hostpath
kind: PersistentVolume
apiVersion: v1
metadata:
  name: task-pv-volume
  labels:
    type: local
spec:
  storageClassName: manual
  capacity:
    storage: 2Gi
  accessModes:
    - ReadWriteOnce
  hostPath:
    path: "/mnt/data"
EOF
cat > pv.yaml <<EOF
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: task-pv-claim
spec:
  storageClassName: manual
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
EOF
cat > pv.yaml <<EOF
kind: Pod
apiVersion: v1
metadata:
  name: task-pv-pod
spec:
  volumes:
    - name: task-pv-storage
      persistentVolumeClaim:
       claimName: task-pv-claim
  containers:
    - name: task-pv-container
      image: nginx
      ports:
        - containerPort: 80
          name: "http-server"
      volumeMounts:
        - mountPath: "/usr/share/nginx/html"
          name: task-pv-storage
EOF
# NOTE: the following command should be runned in the pod which ship pod
echo 'hello' > /mnt/data/index.html


# custom install master (TBD)
# install kube* and etcd binary - https://kubernetes.io/docs/setup/scratch/
wget https://github.com/kubernetes/kubernetes/releases/download/v1.13.0/kubernetes.tar.gz
tar -xf kubernetes.tar.gz
./kubernetes/cluster/get-kube-binaries.sh
tar -xf kubernetes/server/kubernetes-server-linux-amd64.tar.gz
sudo cp kubernetes/server/bin/{kube-apiserver,kube-scheduler,kube-controller-manager,kube-proxy,kubectl,kubelet} /usr/bin/
wget https://github.com/etcd-io/etcd/releases/download/v3.3.10/etcd-v3.3.10-linux-amd64.tar.gz
tar -xf etcd-v3.3.10-linux-amd64.tar.gz
sudo cp etcd-v3.3.10-linux-amd64/{etcd,etcdctl} /usr/bin/

# create cert
sudo -i
mkdir -p /etc/kubernetes && cd /etc/kubernetes
openssl genrsa -out ca.key
openssl req -x509 -new -nodes -key ca.key -subj "/CN=quqi.cluster" -days 5000 -out ca.crt
# openssl x509 -in ca.crt -out ca.pem  #convert CRT to PEM

#Create key pair for kube-master. NOTE: kube-master should be the same as it's hostname
openssl genrsa -out server.key
openssl req -new -key server.key -out server.csr -subj "/CN=system:node:172.31.20.224/O=system:nodes"
openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 5000
openssl x509  -noout -text -in ./server.crt

# Create key pair for every kube-worker, here we will just create one for all-workers
openssl genrsa -out client.key
openssl req -new -key client.key -out client.csr -subj "/CN=system:node:worker/O=system:nodes"
openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 5000
openssl x509  -noout -text -in ./client.crt

# static pod way 
sudo apt install -y docker.io
sudo mkdir -p /etc/kubernetes/manifests
sudo kubelet --register-node=false --pod-manifest-path=/etc/kubernetes/manifests
https://medium.com/containerum/4-ways-to-bootstrap-a-kubernetes-cluster-de0d5150a1e4

# systemd way - https://medium.com/containerum/4-ways-to-bootstrap-a-kubernetes-cluster-de0d5150a1e4
git clone https://github.com/kubernetes/contrib.git
cd ./contrib/init/systemd/
sudo useradd kube
sudo mv *.service /etc/systemd/system/
sudo mv ./environ/* /etc/kubernetes/
sudo mkdir -p  /var/run/kubernetes
sudo systemctl enable kube-apiserver
sudo systemctl restart kube-apiserver
...

# install etcd - https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/
sudo touch /var/log/etcd.log && sudo chown -R $(id -u) /var/log/etcd.log
sudo etcd --listen-client-urls=http://0.0.0.0:2379 --advertise-client-urls=http://172.31.20.224:2379 >> /var/log/etcd.log 2>&1 &
sudo ETCDCTL_API=2 etcdctl --endpoints=http://172.31.20.224:2379 cluster-health 
sudo ETCDCTL_API=3 etcdctl --endpoints=http://172.31.20.224:2379 member list
sudo ETCDCTL_API=3 etcdctl --endpoints=http://172.31.20.224:2379 snapshot save snapshot.save

# run kube-apiserver
sudo touch /var/log/kube-apiserver.log && sudo chown -R $(id -u) /var/log/kube-apiserver.log
#sudo kube-apiserver --logtostderr --v=0 --etcd-servers=http://172.31.20.224:2379 --insecure-bind-address=0.0.0.0 --insecure-port=8080 --service-cluster-ip-range=10.244.0.0/16 --admission-control=ServiceAccount,LimitRanger,ResourceQuota --bind-address=0.0.0.0 --secure-port=6443 --client-ca-file=/etc/kubernetes/ca.crt --tls-private-key-file=/etc/kubernetes/server.key --tls-cert-file=/etc/kubernetes/server.crt >> /var/log/kube-apiserver.log 2>&1 &
sudo kube-apiserver --logtostderr --v=0 --etcd-servers=http://172.31.20.224:2379 --insecure-bind-address=0.0.0.0 --insecure-port=8080 --service-cluster-ip-range=10.244.0.0/16 --admission-control=ServiceAccount,LimitRanger,ResourceQuota >> /var/log/kube-apiserver.log 2>&1 &

# run kube-controller-manager
sudo touch /var/log/kube-controller-manager.log && sudo chown -R $(id -u) /var/log/kube-controller-manager.log
#sudo kube-controller-manager --logtostderr --v=0 --master=https://172.31.20.224:6443 --service-account-private-key-file=/etc/kubernetes/server.key --root-ca-file=/etc/kubernetes/ca.crt
sudo kube-controller-manager --logtostderr --v=0 --master=http://172.31.20.224:8080  >> /var/log/kube-controller-manager.log 2>&1 &

# run kube-scheduler
sudo touch /var/log/kube-scheduler.log && sudo chown -R $(id -u) /var/log/kube-scheduler.log
sudo kube-scheduler --logtostderr --v=0 --master=http://172.31.20.224:8080  >> /var/log/kube-scheduler.log 2>&1 &

# verity master
kubectl -s http://172.31.20.224:8080 get componentstatus
kubectl -s http://172.31.20.224:8080 get node

# run kube-proxy, docker and kubelet
sudo touch /var/log/kube-proxy.log && sudo chown -R $(id -u) /var/log/kube-proxy.log
sudo kube-proxy --logtostderr --v=0 --master=http://172.31.20.224:8080  >> /var/log/kube-proxy.log 2>&1 &

sudo apt install docker.io
sudo systemctl enable docker

kubectl config view
kubectl config set-credentials admin --username=admin --password=password
kubectl config set-cluster quqi.cluster --insecure-skip-tls-verify=true --server=http://172.31.20.224:8080
kubectl config set-context quqi.context --user=admin --namespace=default --cluster=quqi.cluster
kubectl config use-context quqi.context
sudo cp .kube/config /etc/kubernetes/kubeconfig

sudo touch /var/log/kubelet.log && sudo chown -R $(id -u) /var/log/kubelet.log
sudo kubelet --logtostderr --v=0 --kubeconfig=/etc/kubernetes/kubeconfig  >> /var/log/kubelet.log 2>&1 &

# self-hosted way
# create kubeconfig
NOTE: Put the kubeconfig(s) on every node. eg: in /var/lib/kube-proxy/kubeconfig and /var/lib/kubelet/kubeconfig.
CLUSTER_NAME=quqicluster
CA_CERT=/etc/kubernetes/pki/ca.crt
CLI_CERT=/etc/kubernetes/pki/client.crt
CLI_KEY=/etc/kubernetes/pki/client.key
TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/[:space:]" | dd bs=32 count=1 2>/dev/null)
USER=admin
CONTEXT_NAME=admin_context
MASTER_IP=172.31.29.147
sudo kubectl config set-cluster $CLUSTER_NAME --certificate-authority=$CA_CERT --embed-certs=true --server=https://$MASTER_IP
sudo kubectl config set-credentials $USER --client-certificate=$CLI_CERT --client-key=$CLI_KEY --embed-certs=true --token=$TOKEN
sudo kubectl config set-context $CONTEXT_NAME --cluster=$CLUSTER_NAME --user=$USER
sudo kubectl config use-context $CONTEXT_NAME

# install docker
#iptables -t nat -F
#ip link set docker0 down
#ip link delete docker0
sudo apt install -y docker.io
sudo systemctl enable docker

# Install kubelet
sudo mkdir -p /var/lib/kubelet
sudo cp ~/.kube/config /var/lib/kubelet/kubeconfig
sudo kubelet --kubeconfig=/var/lib/kubelet/kubeconfig

kubelet --kubeconfig=/var/lib/kubelet/kubeconfig --cgroup-driver=cgroupfs --network-plugin=cni --pod-infra-container-image=k8s.gcr.io/pause:3.1

# Install kube-proxy
sudo mkdir -p /var/lib/kube-proxy
sudo cp ~/.kube/config /var/lib/kube-proxy/kubeconfig
sudo kube-proxy --master=https://$MASTER_IP --kubeconfig=/var/lib/kube-proxy/kubeconfig

20211103更新 - 安装metric-server

问题是k8s metrics中与memory相关的metric只有workingSetBytes这一项,似乎不全。

"memory": {
"time": "2021-10-04T12:06:58Z",
"workingSetBytes": 63426560
}

一些理论学习:
在提出 metric api 的概念时,官方页提出了新的监控体系,监控资源被分为了2种:

  • Core metrics(核心指标):从 Kubelet、cAdvisor 等获取度量数据,再由metrics-server提供给Dashboard、HPA 控制器等使用. 核心指标只包含node和pod的cpu、内存等,一般来说,核心指标作HPA已经足够,但如果想根据自定义指标:如请求qps/5xx错误数来实现HPA,就需要使用自定义指标了。
  • Custom Metrics(自定义指标):由Prometheus Adapter提供API custom.metrics.k8s.io,由此可支持任意Prometheus采集到的指标, 目前Kubernetes中自定义指标一般由Prometheus来提供,再利用k8s-prometheus-adpater聚合到apiserver,实现和核心指标(metric-server)同样的效果。
    在这里插入图片描述
    在这里插入图片描述
数据采集使用cAdvisor(它已经与kubelet集成)
dokcer stats可以查看容器的运行状态, 但无法通过http的方式来获取数据并且没有GUI, 所以cadvisor诞生了。 cadvisor不仅可以搜集一台机器上所有运行的容器信息还提供基础查询界面和http接口,方便Prometheus进行数据抓取。
k8s kubelet集成了cadvisor(./pkg/klubelet/cadvisor, https://github.com/kubernetes/community/blob/master/contributors/design-proposals/instrumentation/monitoring_architecture.md). 在k8s v1.13版本中kubelet主要调用的cadvisor方法有:MachineInfo, RootFsInfo, VersionInfo, GetDirFsInfo, GetFsInfo, ContainerInfoV2, SubcontainerInfo, ContainerInfo, WatchEvents. 
cadvisor架构就是一个event机制分事件监听层和事件处理层两层,事件监听层有:ContainerAdd|ContainerDelete事件(watchForNewContainers)会启动rawWatcher(通过go-inotify监控cgroup根目录)和EventOomKill事件(watchForNewOoms(调用go-kmsg-parser库读取/dev/kmsg字符串). 事件处理层的eventHandler则处理监控,它的核心数据结构就是events.watchers, 它维护了一组watch, 第一个watch存储了一个channel和一个request.
kubelet中的cadvisor是没有对外开放4194端口的。所以,我们只能通过apiserver提供的api做代理获取监控指标.
cAdvisor的metrics地址: /api/v1/nodes/[NODE]/proxy/metrics/cadvisor
kubelnet的metrics地址:/api/v1/nodes/[NODE]/proxy/metrics

数据汇集使用metrics-server(v1.8版本之前叫heapster)
metrics-server定期调用kubelet summary API从kubelet节点获取服务指标,然后将指标汇总、存储到内存中,仅仅存储指标最新状态,一旦重启组件数据将会丢失。
metrics-server主要分为API和Server两大部分。其中API部分主要通过APIServer对外暴露Pod资源使用情况,比如:HPA、kubectl top、Kubernetes dashboard 等。

什么是metricbeat? 在k8s中,metribeat用于采集k8s相关的性能指标,Elasticsearch用于对于数据落地存储和搜索的引擎, Kibana用于对数据可视化。
什么是cAdvisor? 
promethes是如何监控容器内存的?
container_memory_rss: RSS内存,即常驻内存集(Resident Set Size),是分配给进程使用实际物理内存,而不是磁盘上缓存的虚拟内存。RSS内存包括所有分配的栈内存和堆内存,以及加载到物理内存中的共享库占用的内存空间,但不包括进入交换分区的内存。
container_memory_working_set_bytes 容器使用内存 更能体现出mem usage,也是oom killer指标(建议使用)
container_spec_memory_limit_bytes: 容器的内存使用量限制

环境安装,先用第一节的kubeadm安装一个k8s环境,然后用下列命令安装metric-server

kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml

然后编译安装patch

#https://golang.org/doc/install
GO_VERSION=1.17.2
GO_ARCH=linux-amd64
curl -o go.tgz https://dl.google.com/go/go${GO_VERSION}.${GO_ARCH}.tar.gz
sudo rm -rf /usr/lib/go && sudo tar -C /usr/lib -xzf go.tgz
go version
export GOROOT=/usr/lib/go
export GOPATH=/bak/golang
export PATH=$GOROOT/bin:$GOPATH/bin:$PATH

mkdir -p $GOPATH/src/k8s.io
cd $GOPATH/src/k8s.io
git clone https://github.com/kubernetes/kubernetes
cd kubernetes
#https://github.com/kubernetes/kubernetes/commit/545d8985844f6b054319db8545cb9892148e2955
git format-patch -1 545d8985844f6b054319db8545cb9892148e2955
git checkout -b v1.22.0 v1.22.0
patch -p1 < 0001-Extract-containerID-from-systemd-style-cgroupPath-in.patch
make kubelet

重现bug:

curl -qk -H "Authorization:Bearer $(kubectl -n kube-system get secret metrics-server-token-7tbxf -o jsonpath={.data.token} | base64 -d)" -k https://127.0.0.1:10250/stats/summary 2>/dev/null| jq .pods[0].containers[0].memory

一些containerd CLI示例:

sudo ctr namespaces ls
sudo ctr --namespace k8s.io image ls
sudo ctr --namespace k8s.io containers ls
#sudo ctr --namespace k8s.io t exec --exec-id 0  -t test sh
#just for k8s env (k8s has CRI interface so it has crictl CLI as well)
sudo crictl ps
sudo crictl images
sudo crictl stats
sudo crictl pods
sudo crictl pods --name kube-apiserver-juju-10ee00-case320434-0 -v
sudo crictl exec -it 50365fbc248f2 /bin/bash

# to avoid 'You have reached your pull rate limit'
sed -i 's/registry-1.docker.io/docker.mirrors.ustc.edu.cn/g' /etc/containerd/config.toml
systemctl restart containerd 
ctr images pull docker.io/library/nginx:alpine
ctr run docker.io/library/nginx:alpine test --rm
ctr containers ls

20211105更新 - 如何debug kubelet

单元测试

# test a module
go test -v k8s.io/kubernetes/pkg/kubelet/stats
# test a single package
alias testcases="sed -n 's/func.*\(Test.*\)(.*/\1/p' | xargs | sed 's/ /|/g'"
go test -v -run $(cat pkg/kubelet/stats/cri_stats_provider_test.go | testcases) k8s.io/kubernetes/pkg/kubelet/stats
# test a single Class
go test -v -run TestExtractIDFromCgroupPath k8s.io/kubernetes/pkg/kubelet/stats
go test -v -run "^TestExtractIDFromCgroupPath$" k8s.io/kubernetes/pkg/kubelet/stats

第一步,先编译

#go build -gcflags=all="-N -l" main.go
GO111MODULE=off KUBE_GIT_TREE_STATE=clean make kubelet GOGCFLAGS="all=-N -l" GOLDFLAGS=""
#GOGCFLAGS="-N -l" #禁用编译优化和禁用内联优化, 最终目的是可以在调试代码的时候单步执行可以看到实际对应的每行源码
# GOLDFLAGS=""      #./hack/lib/golang.sh中的'-s -w'选项将禁用符号表,需去掉这个限制

第二步,运行dlv service

# go get -v -d github.com/go-delve/delve/cmd/dlv@latest
go install github.com/go-delve/delve/cmd/dlv@latest
ps -ef |grep kubelet && sudo systemctl stop kubelet.service
cd /bak/golang/src/k8s.io/kubernetes
sudo /bak/golang/bin/dlv --headless -l 127.0.0.1:1234 --api-version=2 --accept-multiclient exec sudo /bak/golang/src/k8s.io/kubernetes/_output/bin/kubelet -- --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf --config=/var/lib/kubelet/config.yaml --container-runtime=remote --container-runtime-endpoint=/run/containerd/containerd.sock --container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock

第三步,运行dlv client, 必须运行完c之前(之前要先设置断点如b main.main)才可能使用l查看源代码(若按r重启,设断点,再按c才按l)一个大坑,这里要注意十分重点的一点,必须运行’cd /bak/golang/src/k8s.io/kubernetes’命令再执行dlv server与dlv client,否则将找不着源文件
原因是dlv的源码(formatBreakpointLocation ->formatLocation -> formatPath)使用了下列代码.

func (t *Term) formatPath(path string) string {                                                                                                                                       
    path = t.substitutePath(path)                                               
    workingDir, _ := os.Getwd()                                                 
    return strings.Replace(path, workingDir, ".", 1)                            
}  

例如:

(dlv) config max-string-len 1000
(dlv) set one-variable="aaaa"
(dlv) sources summary.go
(dlv) sources pkg/kubelet/server/stats/summary.go
(dlv) b getCRICadvisorStats
(dlv) b k8s.io/kubernetes/pkg/kubelet/server/stats.(*summaryProviderImpl).Get
Breakpoint 2 set at 0x564cb22341f2 for k8s.io/kubernetes/pkg/kubelet/server/stats.(*summaryProviderImpl).Get() _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/server/stats/summary.go:67
(dlv) c
(dlv) n
(dlv) s
(dlv) so (stepout, step out of the current function, don't use r which means restart)

代码路径是这样子的:

(dlv) bt                                                                                                                                                                              
 0  0x000055968e4a2e72 in k8s.io/kubernetes/pkg/kubelet/stats.getCRICadvisorStats                                                                                                     
    at _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/stats/cri_stats_provider.go:817                                                                                             
 1  0x000055968e49a935 in k8s.io/kubernetes/pkg/kubelet/stats.(*criStatsProvider).listPodStats                                                                                        
    at _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/stats/cri_stats_provider.go:164                                                                                             
 2  0x000055968e499687 in k8s.io/kubernetes/pkg/kubelet/stats.(*criStatsProvider).ListPodStats                                                                                        
    at _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/stats/cri_stats_provider.go:99                                                                                              
 3  0x000055968e5b4d3b in k8s.io/kubernetes/pkg/kubelet.(*Kubelet).ListPodStats                                                                                                       
    at _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/kubelet.go:1222                                                                                                             
 4  0x000055968c624aba in k8s.io/kubernetes/pkg/kubelet/server/stats.(*summaryProviderImpl).Get                                                                                       
    at _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/server/stats/summary.go:91                                                                                                  
 5  0x000055968c628bec in k8s.io/kubernetes/pkg/kubelet/server/stats.(*resourceAnalyzer).Get
    at <autogenerated>:1
 6  0x000055968c62383c in k8s.io/kubernetes/pkg/kubelet/server/stats.(*handler).handleSummary
    at _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/server/stats/handler.go:159

所以这样能触发断点,先设置断点后按c

(dlv) b k8s.io/kubernetes/pkg/kubelet/server/stats.(*handler).handleSummary
Breakpoint 1 set at 0x55a59918d4cf for k8s.io/kubernetes/pkg/kubelet/server/stats.(*handler).handleSummary() _output/local/go/src/k8s.io/kubernetes/pkg/kubelet/server/stats/handler.go:142
(dlv) c

然后运行下面命令触发:

curl -qk -H "Authorization:Bearer $(kubectl -n kube-system get secret metrics-server-token-7wdgn -o jsonpath={.data.token} | base64 -d)" -k https://127.0.0.1:10250/stats/summary 2>/dev/null| jq '.pods[0].containers[0].memory'

原因是上面的ListPodStats方法最终会调用下列的listPodStatsPartiallyFromCRI

func (p *criStatsProvider) listPodStatsPartiallyFromCRI(updateCPUNanoCoreUsage bool, containerMap map[string]*runtimeapi.Conta
    ...
    allInfos, err := getCadvisorContainerInfo(p.cadvisor)                                                                      
    caInfos, allInfos := getCRICadvisorStats(allInfos)

        // Fill available stats for full set of required pod stats              
        cs := p.makeContainerStats(stats, container, rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata(), updateCPUNanoCoreUsage)
        p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
                                                                                                      
        caStats, caFound := caInfos[containerID]                                
        if !caFound {                                                           
            klog.V(5).InfoS("Unable to find cadvisor stats for container", "containerID", containerID)
        } else {                                                                
            p.addCadvisorContainerStats(cs, &caStats)                           
        }                                                                       
        ps.Containers = append(ps.Containers, *cs) 

上面的containerID是容器的ID,

(dlv) p containerID
"79fedb3bd23ca77c9d9ccb41909038316f713df22b54b7d942b71d3812e7c74e"

并且caStats里取到的memory也是正常的,但是它的key确显示是"kubepods-burstable-pod36b6d6be73b1065af369b3985edafa09.slice:cri-containerd:79fedb3bd23ca77c9d9ccb41909038316f713df22b54b7d942b71d3812e7c74e",所以有问题.

        "kubepods-burstable-pod36b6d6be73b1065af369b3985edafa09.slice:cri-containerd:79fedb3bd23ca77c9d9ccb41909038316f713df22b54b7d942b71d3812e7c74e": *{
                Spec: (*"k8s.io/kubernetes/vendor/github.com/google/cadvisor/info/v2.ContainerSpec")(0xc002145680),
                Stats: []*k8s.io/kubernetes/vendor/github.com/google/cadvisor/info/v2.ContainerStats len: 2, cap: 2, [
                        *(*"k8s.io/kubernetes/vendor/github.com/google/cadvisor/info/v2.ContainerStats")(0xc000dda1e0),
                        *(*"k8s.io/kubernetes/vendor/github.com/google/cadvisor/info/v2.ContainerStats")(0xc000dda3c0),
                ],}, 

(dlv) p caStats.Stats[0].Memory
*k8s.io/kubernetes/vendor/github.com/google/cadvisor/info/v1.MemoryStats {
        Usage: 58802176,
        MaxUsage: 59199488,
        Cache: 8110080,
        RSS: 48140288,
        Swap: 0,
        MappedFile: 135168,
        WorkingSet: 45826048,
        Failcnt: 0,
       ...

这个key是从getCRICadvisorStats中取到的,也就是这个patch (https://github.com/kubernetes/kubernetes/commit/545d8985844f6b054319db8545cb9892148e2955 ),它处理了下面两种cgroup路径

// case0 == cgroupfs: "/kubepods/burstable/pod2fc932ce-fdcc-454b-97bd-aadfdeb4c340/9be25294016e2dc0340dd605ce1f57b492039b267a6a618a7ad2a7a58a740f32"
// case1 == systemd: "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod2fc932ce_fdcc_454b_97bd_aadfdeb4c340.slice/cri-containerd-aaefb9d8feed2d453b543f6d928cede7a4dbefa6a0ae7c9b990dd234c56e93b9.scope"

但是显然没有处理这种路径, 所以上述代码就抛这个错了(Unable to find cadvisor stats for container):

kubepods-burstable-pod36b6d6be73b1065af369b3985edafa09.slice:cri-containerd:79fedb3bd23ca77c9d9ccb41909038316f713df22b54b7d942b71d3812e7c74e
ubuntu@juju-10ee00-case320434-0:~$ ls /sys/fs/cgroup/systemd/system.slice/containerd.service/kubepods-burstable-pod36b6d6be73b1065af369b3985edafa09.slice\:cri-containerd\:79fedb3bd23ca77c9d9ccb41909038316f713df22b54b7d942b71d3812e7c74e/
cgroup.clone_children  cgroup.procs           notify_on_release      tasks

这种路径需要单独处理吗?又找到一个containerd的bug (https://github.com/containerd/containerd/issues/4900)说是使用runC时又想使用systemd cgroup还需添加systemdCgroup = true

# https://github.com/containerd/containerd/blob/18ad79d328ff6dd12cd6be4df97ad4a7fd1918cb/pkg/cri/config/config.go#L213-L216
vim /etc/containerd/config.toml
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
  systemdCgroup = true

添加之后,新创建一个创建就好了,所以它对已存在的容器不生效(那对客户环境怎么办,可以:https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/#update-the-cgroup-driver-on-all-nodes)。同时下列ctrctl info等命令是有bug看不了的.
同时确保kubeadm中我们已经设置了使用systemd

$ kubectl describe cm -n kube-system kubelet-config-1.20 |grep cgroupDriver
cgroupDriver: systemd
$ sudo grep -r 'cgroupDriver' /var/lib/kubelet/config.yaml
cgroupDriver: systemd

所以这个patch(https://github.com/kubernetes/kubernetes/commit/545d8985844f6b054319db8545cb9892148e2955)就够了
为juju charm也提了一个bug - https://bugs.launchpad.net/charm-containerd/+bug/1951911

20211112 - systemd unit的三种类型

systemd的unit有三种类型(service, scope, slice), 可使用systemd-cgls命令查看(systemd-cgtop命令看资源的实际消耗)。

  • service是由systemd创建的一个或一组进程作为一个整体启动或终止;
  • scope是由外部进程通过fork创建但在systemd注册的进程, scope会将其封装。例如:用户会话、容器和虚拟机被认为是scope;
  • slice是个层级它包含service或scope,service或scope再包含进程,默认情况下系统会创建4种slice(-.slice根slice, system.slice, user.slice, machine.slice)

例如,我们为top命令创建一个临时的cgroup, 这时top命令被包装成一个service运行在后台了

sudo systemd-run --unit=toptest --slice=test top -b
$ sudo systemctl status toptest |grep CGroup -A1
     CGroup: /test.slice/toptest.service
             └─3467203 /usr/bin/top -b
$ sudo cat /sys/fs/cgroup/systemd/test.slice/toptest.service/tasks 
3467203
$ sudo cat /proc/3467203/cgroup 
12:memory:/test.slice/toptest.service
11:rdma:/
10:perf_event:/
9:net_cls,net_prio:/
8:freezer:/
7:hugetlb:/
6:blkio:/test.slice
5:devices:/test.slice
4:cpu,cpuacct:/test.slice
3:pids:/test.slice/toptest.service
2:cpuset:/
1:name=systemd:/test.slice/toptest.service
0::/test.slice/toptest.service

比如限制toptest.service的CPUShares为600,可用内存的上限为550M, 此时memory与cpu下都出现了toptest.service字眼

sudo systemctl set-property toptest.service CPUShares=600 MemoryLimit=500M
$ sudo cat /proc/3467203/cgroup 
12:memory:/test.slice/toptest.service
11:rdma:/
10:perf_event:/
9:net_cls,net_prio:/
8:freezer:/
7:hugetlb:/
6:blkio:/test.slice
5:devices:/test.slice
4:cpu,cpuacct:/test.slice/toptest.service
3:pids:/test.slice/toptest.service
2:cpuset:/
1:name=systemd:/test.slice/toptest.service
0::/test.slice/toptest.service

临时cgroup的特征是,所包含的进程一旦结束,临时cgroup就会被自动释放。

$ sudo kill -9 3467203
$ sudo cat /sys/fs/cgroup/systemd/test.slice/toptest.service
cat: /sys/fs/cgroup/systemd/test.slice/toptest.service: 没有那个文件或目录

相对地,在systemd文件(/lib/systemd/system/xxx.service)的[Service]段中包含下列限制的叫持久性cgroup(也可通过systemctl命令直接修改:sudo systemctl set-property cron.service CPUShares=700)

CPUShares=600
MemoryLimit=500M

20211112更新 - 如何贡献k8s社区

见 : https://zhhuabj.blog.csdn.net/article/details/9425385

20230425 - debug juju

先编译带符号表的二进制文件(不是必须的,不编译的话也能直接用这个命令debug如: /bak/golang/bin/dlv debug ./cmd/juju):

cd /bak/golang/src/github.com/juju/juju
go build -gcflags "all=-N -l" -o juju2 ./cmd/juju
cd /bak/golang/src/github.com/juju/juju
go build -gcflags "all=-N -l" -o jujud ./cmd/jujud

在juju controller上用dlv exec运行jujud, 然后再node1上运行’dlv connect’ (node1上有源码), 要想看到源码,sources指令不是必须的,但必须先运行c 才能用 l 看到源码(当然运行 c 之前要先设置断点,也可以用b main.main设置在开头

sudo systemctl stop jujud-machine-0.service
sudo /bak/golang/bin/dlv --headless -l 192.168.10.131:1234 exec ./jujud -- machine --data-dir /var/lib/juju --machine-id 0 --debug
cd /bak/golang/src/github.com/juju/juju
sudo /bak/golang/bin/dlv connect 192.168.10.131:1234
(dlv) b main.main
(dlv) c
(dlv) l
(dlv) b github.com/juju/juju/apiserver/facades/client/modelmanager.(*ModelManagerAPI).CreateModel
(dlv) b github.com/juju/juju/api/client/modelmanager.(*Client).CreateModel
(dlv) c

在node1上运行’juju add-model’ 命令可以进入调试

cd /bak/golang/src/github.com/juju/juju
#do not to use 'sudo' to avoid 'unable to open /tmp/juju-store-lock-3635383939333230: permission denied'
/bak/golang/bin/dlv exec ./juju2 -- add-model test2
# 但不清楚为什么同样的方法将断点设置在ListModels处时用'juju status'触发不了,CreateModel是能触发的
(dlv) b github.com/juju/juju/api/client/modelmanager.(*Client).ListModels
(dlv) b github.com/juju/juju/api/client/modelmanager.(*Client).CreateModel
(dlv) c
(dlv) l

还能与jujud交互调试。如在juju add-model一端看到:

(dlv) args
c = ("*github.com/juju/juju/api/client/modelmanager.Client")(0xc0006673e0)
name = "test2"
owner = "admin"
cloud = "vsphere"
cloudRegion = ""
cloudCredential = github.com/juju/names/v4.CloudCredentialTag {cloud: (*"github.com/juju/names/v4.CloudTag")(0xc00081e440), owner: (*"github.com/juju/names/v4.UserTag")(0xc00081e450), name: "vsphere"}
config = map[string]interface {} [...]
~r0 = github.com/juju/juju/api/base.ModelInfo {Name: "", UUID: "", Type: "",...+13 more}
~r1 = error nil
(dlv) locals
result = github.com/juju/juju/api/base.ModelInfo {Name: "", UUID: "", Type: "",...+13 more}

然后在jujud一端是能看到如:

(dlv) p credentialValue
github.com/juju/juju/state.Credential {
        cloudCredentialDoc: github.com/juju/juju/state.cloudCredentialDoc {
                DocID: "vsphere#admin#vsphere",
                Owner: "admin",
                Cloud: "vsphere",
                Name: "vsphere",
                Revoked: false,
                AuthType: "userpass",
                Attributes: map[string]string [...],
                Invalid: false,
                InvalidReason: "",},}
(dlv) p credentialValue.Attributes
map[string]string [
        "password": "xxx", 
        "user": "johndoe@test.com", 
]

我们再调试下如何创建虚机的(juju add-machine)

ubuntu@juju-0c051f-0:~$ sudo ./dlv --headless -l 192.168.10.131:1234 exec ./jujud -- machine --data-dir /var/lib/juju --machine-id 0 --debug
API server listening at: 192.168.10.131:1234
2023-04-27T03:21:40Z warning layer=rpc Listening for remote connections (connections are not authenticated nor encrypted)

hua@node1:/bak/golang/src/github.com/juju/juju$ sudo /bak/golang/bin/dlv connect 192.168.10.131:1234
Type 'help' for list of commands.
(dlv) b github.com/juju/juju/provider/vsphere.(*sessionEnviron).newRawInstance
Breakpoint 1 set at 0x720cf52 for github.com/juju/juju/provider/vsphere.(*sessionEnviron).newRawInstance() ./provider/vsphere/environ_broker.go:118
(dlv) c

hua@node1:/bak/golang/src/github.com/juju/juju$ /bak/golang/bin/dlv exec ./juju2 -- add-machine --series=jammy --constraints "mem=8G cores=2 root-disk=10G"
Type 'help' for list of commands.
(dlv) b github.com/juju/juju/provider/vsphere.(*sessionEnviron).newRawInstance
Breakpoint 1 set at 0x529d9b2 for github.com/juju/juju/provider/vsphere.(*sessionEnviron).newRawInstance() ./provider/vsphere/environ_broker.go:118
(dlv) c
created machine 0
Process 2352606 has exited with status 0


> github.com/juju/juju/provider/vsphere.(*sessionEnviron).newRawInstance() ./provider/vsphere/environ_broker.go:266 (PC: 0x720ef14)
   261:                 VMTemplate:             vmTemplate,
   262:                 ComputeResource:        &availZone.r,
   263:                 ResourcePool:           availZone.pool.Reference(),
   264:         }
   265:
=> 266:         vm, err := env.client.CreateVirtualMachine(env.ctx, createVMArgs)
   267:         if vsphereclient.IsExtendDiskError(err) {
   268:                 // Ensure we don't try to make the same extension across
   269:                 // different resource groups.
   270:                 err = environs.ZoneIndependentError(err)
   271:         }

(dlv) p env.environ.cloud
github.com/juju/juju/environs/cloudspec.CloudSpec {
        Type: "vsphere",
        Name: "vsphere",
        Region: "Datacenter",
        Endpoint: "192.168.10.4",
        IdentityEndpoint: "",
        StorageEndpoint: "",
        Credential: *github.com/juju/juju/cloud.Credential {
                authType: "userpass",
                attributes: map[string]string [...],
                Revoked: false,
                Label: "",
                Invalid: false,
                InvalidReason: "",},
        CACertificates: []string len: 0, cap: 0, nil,
        SkipTLSVerify: false,
        IsControllerCloud: true,}

(dlv) p env.environ.cloud.Credential
*github.com/juju/juju/cloud.Credential {
        authType: "userpass",
        attributes: map[string]string [
                "user": "johndoe@test.com", 
                "password": "xxx", 
        ],
        Revoked: false,
        Label: "",
        Invalid: false,
        InvalidReason: "",}

(dlv) p env.environ.provider.environProviderCredentials
github.com/juju/juju/provider/vsphere.environProviderCredentials {}

Reference

[1] http://blog.spider.im/2018/06/26/cka-exam/
[2] https://blog.csdn.net/Ivan_Wz/article/details/119457692

Logo

开源、云原生的融合云平台

更多推荐