alertmaneger.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: alertmanager-config
data:
  alertmanager.yml: |
    global:
      resolve_timeout: 5m  #处理超时时间
    route:
      group_by: ['alertname']  #报警分组依据
      group_wait: 10s  #最初即第一次等待多久时间发送一组警报的通知
      group_interval: 10s  # 在发送新警报前的等待时间
      repeat_interval: 1m  # 发送重复警报的周期
      receiver: 'webhook'
    receivers:
    - name: 'webhook'
      webhook_configs:
      - url: 'http://192.168.187.212:8081/alertmanager/receive'
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: alertmanager
spec:
  replicas: 1
  selector:
    app: alertmanager
  template:
    metadata:
      labels:
        app: alertmanager
    spec:
      containers:
      - name: alertmanager
        image: prom/alertmanager
        imagePullPolicy: IfNotPresent
        args:
           - "--config.file=/etc/alertmanager/alertmanager.yml"
           - "--storage.path=/alertmanager"
        ports:
          - containerPort: 9093
        volumeMounts:
        - mountPath: "/etc/alertmanager"
          name: config-volume
      volumes:
      - name: config-volume
        configMap:
          name: alertmanager-config
---
kind: Service
apiVersion: v1
metadata:
  name: alertmanager
spec:
  type: NodePort
  ports:
    - port: 9093
      nodePort: 30093
  selector:
    app: alertmanager

node-export.yaml

apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: node-exporter
spec:
  template:
    metadata:
      labels:
        app: node-exporter
    spec:
      containers:
      - name: node-exporter
        image: prom/node-exporter
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 9100
---
kind: Service
apiVersion: v1
metadata:
  name: node-exporter
spec:
  type: NodePort
  ports:
    - port: 9100
      nodePort: 30000
  selector:
    app: node-exporter

prometheus.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
data:
  rules.yml: |
    groups:
    - name: example
      rules:
      - alert: InstanceDown
        expr: up == 1
        for: 5s
        labels:
          severity: warning
        annotations:
          summary: "Instance {{ $labels.instance }} down"
          description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."

  prometheus.yml: |
    global:
      scrape_interval:     5s
      evaluation_interval: 5s
    alerting:
      alertmanagers:
      - static_configs:
        - targets: ["192.168.187.210:30093"]
    rule_files:
      - "/etc/prometheus/rules.yml"
    scrape_configs:
    #monitor k8s监控kubernetes
      - job_name: 'kubernetes-nodes-cadvisor'
        kubernetes_sd_configs:
        - api_server: 'http://192.168.187.210:8080'
          role: node
        relabel_configs:
        - action: labelmap
          regex: __meta_kubernetes_node_label_(.+)
        - source_labels: [__meta_kubernetes_role]
          action: replace
          target_label: kubernetes_role
        - source_labels: [__address__]
          regex: '(.*):10250'
          replacement: '${1}:4194'
          target_label: __address__
        - source_labels: [__address__]
          action: replace
          target_label: instance
      - job_name: 'kubernetes_node'
        kubernetes_sd_configs:
        - role: node
          api_server: 'http://192.168.187.210:8080'
        relabel_configs:
        - source_labels: [__address__]
          regex: '(.*):10250'
          replacement: '${1}:30000'
          target_label: __address__
        - source_labels: [__address__]
          action: replace
          target_label: instance
      - job_name: 'kubernetes-pods'
        kubernetes_sd_configs:
        - role: pod
        relabel_configs:
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
          action: keep
          regex: true
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
          action: replace
          regex: ([^:]+)(?::\d+)?;(\d+)
          replacement: $1:$2
          target_label: __address__
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: kubernetes_pod_name
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: prometheus
spec:
  replicas: 1
  selector:
    app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      containers:
      - name: prometheus
        image: prom/prometheus
        imagePullPolicy: IfNotPresent
        args:
           - "--config.file=/etc/prometheus/prometheus.yml"
           - "--storage.tsdb.path=/prometheus/datas"
           - "--storage.tsdb.retention=1h"
        ports:
          - containerPort: 9090
        volumeMounts:
        - mountPath: "/prometheus"
          name: data
        - mountPath: "/etc/prometheus"
          name: config-volume
      volumes:
      - name: data
        emptyDir: {}
      - name: config-volume
        configMap:
          name: prometheus-config
---
kind: Service
apiVersion: v1
metadata:
  name: prometheus
spec:
  type: NodePort
  ports:
    - port: 9090
      nodePort: 30090
  selector:
    app: prometheus
Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐