一、在k8s中部署prometheus

首先先下载prometheus镜像,不然部署时再下载会很慢
我这里用的是v2.2.1版本
[root@master ~]# docker pull prom/prometheus:v2.2.1

二、创建一个存储prometheus数据的目录和sa账号

在你指定的prometheus节点中创建此目录,如果yanl文件中不配置指定节点,那每个节点都需要创建
	[root@master ~]# mkdir /data
	[root@master ~]# chmod 777 /data
创建一个sa账号
	[root@master ~]# kubectl create serviceaccount monitor -n monitor-sa
把sa账号monitor通过clusterrolebing绑定到clusterrole上
	[root@master ~]# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitor-sa --clusterrole=cluster-admin  --serviceaccount=monitor-sa:monitor

三、编写存储卷、pod、svc这三个yaml文件

1、编写存储卷configMap文件
[root@master ~]# vim prometheus-cfg.yaml
kind: ConfigMap
apiVersion: v1
metadata:
  labels:
    app: prometheus
  name: prometheus-config
  namespace: monitor-sa
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      scrape_timeout: 10s
      evaluation_interval: 1m
    scrape_configs:
    - job_name: 'kubernetes-node'
      kubernetes_sd_configs:
      - role: node
      relabel_configs:
      - source_labels: [__address__]
        regex: '(.*):10250'
        replacement: '${1}:9100'
        target_label: __address__
        action: replace
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
    - job_name: 'kubernetes-node-cadvisor'
      kubernetes_sd_configs:
      - role:  node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
    - job_name: 'kubernetes-apiserver'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https
    - job_name: 'kubernetes-service-endpoints'
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        action: replace
        target_label: __scheme__
        regex: (https?)
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        action: replace
        target_label: __address__
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: kubernetes_name

配置文件解释
scrape_interval: 15s:默认值为 15s,收集数据的时间间隔,每隔15s收集一次数据,scrape_interval可以基于全局或基于单个metric定义
scrape_timeout: 10s:采集数据的超时时间,默认为10s
evaluation_interval: 1m:默认为1m,对告警规则做定期计算,然后更新告警状态,evaluation_interval只有全局值


告警状态如下三种

inactive:没有触发阈值
pending:已触发阈值但未满足告警持续时间
firing:已触发阈值且满足告警持续时间

2、编写prometheus pod文件
[root@master ~]# vim prometheus-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus-server     //控制器名称
  namespace: monitor-sa   //命名空间
  labels:
    app: prometheus  //标签
spec:
  replicas: 1   //副本数
  selector:
    matchLabels:
      app: prometheus   //pod标签
      component: server
    #matchExpressions:
    #- {key: app, operator: In, values: [prometheus]}
    #- {key: component, operator: In, values: [server]}
  template:  //模板
    metadata:
      labels:
        app: prometheus
        component: server
      annotations:
        prometheus.io/scrape: 'false'   //是否收集数据
    spec:
      nodeName: k8s-node   //指定节点
      serviceAccountName: monitor   //指定sa
      containers:
      - name: prometheus
        image: prom/prometheus:v2.2.1
        imagePullPolicy: IfNotPresent    //拉取镜像规则,本地有载在本地获取,如果本地没有从仓库拉取
        command:
          - prometheus
          - --config.file=/etc/prometheus/prometheus.yml
          - --storage.tsdb.path=/prometheus 
          - --storage.tsdb.retention=720h
        ports:
        - containerPort: 9090
          protocol: TCP
        volumeMounts:
        - mountPath: /etc/prometheus/prometheus.yml
          name: prometheus-config
          subPath: prometheus.yml
        - mountPath: /prometheus/
          name: prometheus-storage-volume
      volumes:   //将configmap存储卷挂在到上面的容器对应的目录中
        - name: prometheus-config
          configMap:
            name: prometheus-config
            items:
              - key: prometheus.yml
                path: prometheus.yml
                mode: 0644
        - name: prometheus-storage-volume
          hostPath:
           path: /data
           type: Directory

3、编写service配置文件
vim prometheus-svc.yaml
apiVersion: v1
kind: Service
metadata:
  name: prometheus
  namespace: monitor-sa
  labels:
    app: prometheus
spec:
  type: NodePort
  ports:
    - port: 9090
      targetPort: 9090
      nodePort: 30000
      protocol: TCP
  selector:
    app: prometheus
    component: server

四、创建、查看、验证

1、创建
[root@master prometheus_yaml]# kubectl apply -f prometheus-cfg.yaml 
[root@master prometheus_yaml]# kubectl apply -f prometheus-deploy.yaml 
[root@master prometheus_yaml]# kubectl apply -f prometheus-svc.yaml 

2、查看pod
[root@master prometheus_yaml]# kubectl get po -n monitor-sa 
NAME                                 READY   STATUS    RESTARTS   AGE
node-exporter-78gwm                  1/1     Running   1          29h
node-exporter-859n8                  1/1     Running   1          29h
node-exporter-wljqh                  1/1     Running   1          29h
prometheus-server-7dbc5cd454-kpv8c   1/1     Running   0          68m
查看service
[root@master prometheus_yaml]# kubectl get svc -n monitor-sa 
NAME         TYPE       CLUSTER-IP      EXTERNAL-IP   PORT(S)          AGE
prometheus   NodePort   10.98.102.185   <none>        9090:30000/TCP   69m
查看configMap
[root@master prometheus_yaml]# kubectl get configmaps -n monitor-sa 
NAME                DATA   AGE
prometheus-config   1      70m

3、验证
访问192.168.241.100:30000
在这里插入图片描述

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐