alertmanager第三方告警插件使用之钉钉告警

1.安装第三方告警插件配置文件root@k8s-60 aler]# cat app.conf#---------------------↓全局配置-----------------------appname = PrometheusAlert#监听端口httpport = 8080runmode = dev#设置代理 proxy = http://123.123.123.123:8080proxy

野猪佩挤

4521人浏览 · 2020-11-27 14:01:39

野猪佩挤 · 2020-11-27 14:01:39 发布

1.安装第三方告警插件

配置文件

root@k8s-60 aler]# cat app.conf 
#---------------------↓全局配置-----------------------
appname = PrometheusAlert
login_user=prometheusalert
#登录密码
login_password=prometheusalert
httpaddr = "0.0.0.0"
#监听端口
httpport = 8080
runmode = dev
#设置代理 proxy = http://123.123.123.123:8080
proxy =
#开启JSON请求
copyrequestbody = true
#告警消息标题
title=PrometheusAlert
#链接到告警平台地址
GraylogAlerturl=http://graylog.org
#钉钉告警 告警logo图标地址
logourl=https://raw.githubusercontent.com/feiyu563/PrometheusAlert/master/doc/alert-center.png
#钉钉告警 恢复logo图标地址
rlogourl=https://raw.githubusercontent.com/feiyu563/PrometheusAlert/master/doc/alert-center.png
#短信告警级别(等于3就进行短信告警) 告警级别定义 0 信息,1 警告,2 一般严重,3 严重,4 灾难
messagelevel=3
#电话告警级别(等于4就进行语音告警) 告警级别定义 0 信息,1 警告,2 一般严重,3 严重,4 灾难
phonecalllevel=4
#默认拨打号码(页面测试短信和电话功能需要配置此项)
defaultphone=xxxxxxxx
#故障恢复是否启用电话通知0为关闭,1为开启
phonecallresolved=0
#自动告警抑制(自动告警抑制是默认同一个告警源的告警信息只发送告警级别最高的第一条告警信息,其他消息默认屏蔽,这么做的目的是为了减少相同告警来源的消息数量,防止告警炸弹,0为关闭,1为开启)
silent=0
#是否前台输出file or console
logtype=file
#日志文件路径
logpath=logs/prometheusalertcenter.log
#转换Prometheus,graylog告警消息的时区为CST时区(如默认已经是CST时区，请勿开启)
prometheus_cst_time=1
#以上配置是必须要有
#---------------------↓webhook-----------------------
#是否开启钉钉告警通道,可同时开始多个通道0为关闭,1为开启
open-dingding=1
#默认钉钉机器人地址
ddurl=https://oapi.dingtalk.com/robot/send?access_token=1a049fe40f02b614da6dba1d85d908b34b9dc452090380c0345548fad0b54404
#是否开启 @所有人(0为关闭,1为开启)

#是否开启微信告警通道,可同时开始多个通道0为关闭,1为开启
open-weixin=1
#默认企业微信机器人地址
wxurl=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxx

容器版

docker run -d -p 88:8080 -v /config/prometheusalert-center:/app/conf --name prometheusalert-center feiyu563/prometheus-alert:latest

k8s版

kubectl create configmap my-alert-conf --from-file=/opt/aler/app.conf
[root@k8s-60 aler]# kubectl get cm | grep my
my-alert-conf   1      45m

yaml模板

cat <<END> feiyu563.yaml 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: alert
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheusalert
  template:
    metadata:
      labels:
        app: prometheusalert
    spec:
      containers:
      - name: prometheusalert
        image: feiyu563/prometheus-alert:latest 
        ports:
        - containerPort: 8080
          name: http
        volumeMounts:
          - name: conf
            mountPath: /app/conf/
            readOnly: true
          - name: date-config
            mountPath: /etc/localtime
      volumes:
      - name: conf
        configMap:
          name: my-alert-conf
      - name: date-config
        hostPath:
          path: /etc/localtime
---
apiVersion: v1
kind: Service
metadata:
  name: alert
spec:
  selector:
      app: prometheusalert
  ports:
    - name: http
      port: 8080
      protocol: TCP
      nodePort: 18080
      targetPort: 8080
  type: NodePort
END

可以登录web测试算部署成功
在这里插入图片描述

2.安装alertmanager

官网下载软件包

wget https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz

配置

cat alertmanager.yml 

global:
  resolve_timeout: 5m

route:
  group_by: ['instance']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1m
  receiver: 'web.hook.prometheusalert'
receivers:
- name: 'web.hook.prometheusalert'
  webhook_configs:
  - url: 'http://172.16.0.61:18080/prometheus/alert'   <<----第三方告警的地址就是上面图的那个ui地址后缀必须/prometheus/alert
  #- url: 'http://alert:8080/prometheus/alert'  k8s使用

语法检查

 ./amtool check-config alertmanager.yml

启动

nohup ./alertmanager --config.file=./alertmanager.yml &

docker容器版

docker run --name alertmanger -d  -p 9093:9093  -v /newmoni/alertmanager.yml:/etc/alertmanager/alertmanager.yml -v /etc/localtime:/etc/localtime:ro -v /etc/timezone:/etc/timezone:ro prom/alertmanager:latest

K8S版

kubectl create configmap conf --from-file=/opt/aler/manager.yml

YAML模板

cat <<END>alert-manget.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: my-alert3
spec:
  replicas: 1
  selector:
    matchLabels:
      app: my-alert32
  template:
    metadata:
      labels:
        app: my-alert32
    spec:
      containers:
      - name: my-alert32
        image: prom/alertmanager:latest 
        command: 
        - "/bin/alertmanager"
        args: 
        - "--config.file=/etc/alertmanager/manager.yml"
        ports:
        - containerPort: 9093
          name: http
        volumeMounts:
          - name: conf
            mountPath: /etc/alertmanager/
            readOnly: true
          - name: dates
            mountPath: /etc/localtime
      volumes:
      - name: conf
        configMap:
          name: alertmanager
      - name: dates
        hostPath:
          path: /etc/localtime
---
apiVersion: v1
kind: Service
metadata:
  name: my-alert3
spec:
  selector:
      app: my-alert32
  ports:
    - name: http
      port: 9093
      protocol: TCP
      nodePort: 19093
      targetPort: 9093
  type: NodePort
END

可以访问下面web ui算成功
在这里插入图片描述

最后 prometheus服务端

配置文件

[root@docker63 ~]# cat /monit/prometheus1.yml 
global:
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['172.16.0.18:9093']
rule_files:
  - "/opt/*.yml"  <<<<<-------告警规则
scrape_configs:
  - job_name: 'linux'
    file_sd_configs:
      - files: ['/prometheus/*.yml']
        refresh_interval: 5s

  - job_name: 'prometheus'
    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'win7'
    static_configs:
    - targets: ['172.16.0.8:9182']

  - job_name: 'linus'
    static_configs:
    - targets: ['172.16.0.60:9100','172.16.0.61:9100']
    -

告警规则

[root@docker63 ~]# cat /rule/ru.yml
groups:
- name: linux
rules:
- alert: Node-Down 
  expr: up == 0
  for: 1m 
  labels:
    severity: warning
  annotations: 
    summary: "Node has been down"
    description: "has been down "

- alert: "内存使用率过高"
  expr: round(100- node_memory_MemAvailable_bytes/node_memory_MemTotal_bytes*100) > 80
  for: 1m
  labels:
    severity: warning
  annotations:
    summary: "内存使用率过高"
    description: "当前使用率{{ $value }}%"

- alert: "CPU使用率过高"
  expr: round(100 - ((avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle",instance!~'bac-.*'}[5m]))) *100)) > 80
  for: 2m
  labels:
    severity: warning
  annotations:
    summary: "CPU使用率过高"
    description: "当前使用率{{ $value }}%"

- alert: "磁盘使用率过高"
  expr: round(100-100*(node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"})) > 80
  for: 15s
  labels:
    severity: warning
  annotations:
    summary: "磁盘使用率过高"
    description: "当前磁盘{{$labels.mountpoint}} 使用率{{ $value }}%"

- alert: "分区容量过低"
  expr: round(node_filesystem_avail_bytes{fstype=~"ext4|xfs",instance!~"testnode",mountpoint!~"/boot.*"}/1024/1024/1024) < 10
  for: 15s
  labels:
    severity: warning
  annotations:
    summary: "分区容量过低"
    description: "当前分区{{$labels.mountpoint}} 容量{{ $value }}GB"

- alert: "网络流出速率过高"
  expr: round(irate(node_network_receive_bytes_total{instance!~"data.*",device!~'tap.*|veth.*|br.*|docker.*|vir.*|lo.*|vnet.*'}[1m])/1024) > 2048
  for: 1m
  labels:
    severity: warning
  annotations:
    summary: "网络流出速率过高"
    description: "当前速率{{ $value }}KB/s"

最后告警

在这里插入图片描述

prometheus容器启动方式

docker run  -d \
  -p 9090:9090 --name prometheus \
  -v /monit/prometheus1.yml:/etc/prometheus/prometheus.yml \
  -v /opt/prometheus/data:/prometheus \
  -v /rule:/opt \  #告警的配置文件目录
  prom/prometheus:latest \
  --config.file=/etc/prometheus/prometheus.yml \
  --storage.tsdb.retention.time=100d \  #数据保留100天
  --web.enable-lifecycle

node_exporter安装

# 创建用户
groupadd -r prometheus
useradd -r -g prometheus -s /sbin/nologin -M -c "prometheus Daemons" prometheus

编辑/usr/lib/systemd/system/node_exporter.service

cat <<END> /usr/lib/systemd/system/node_exporter.service
[Service]
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/node_exporter
 
[Install]
WantedBy=multi-user.target
 
[Unit]
Description=node_exporter
After=network.target
END

#启动
systemctl start node_exporter
systemctl enable node_exporter
systemctl status node_exporter.service

K8S/Kubernetes

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐

【深度】阿里巴巴万级规模 K8s 集群全局高可用体系之美

作者 | 韩堂、柘远、沉醉来源 | 阿里巴巴云原生公众号前言台湾作家林清玄在接受记者采访的时候，如此评价自己 30 多年写作生涯：“第一个十年我才华横溢，‘贼光闪现’，令周边黯然失色；第二个十年，我终于‘宝光现形’，不再去抢风头，反而与身边的美丽相得益彰；进入第三个十年，繁华落尽见真醇，我进入了‘醇光初现’的阶段，真正体味到了境界之美”。长夜有穷，真水无香。领略过了 K8s“身在江

K8S/Kubernetes

如何基于 K8s 构建下一代 DevOps 平台？

作者 | 孙健波（天元）导读：当前云原生 DevOps 体系现状如何？面临哪些挑战？如何通过 OAM 解决云原生 DevOps 场景下的诸多问题？云原生开发应用模型 OAM(Open Application Model) 社区核心成员孙健波将为大家一一解答，并分享如何基于 OAM 和 Kubernetes 打造无限能力的下一代 DevOps 平台。什么是 DevOps？为什么基于 Kub