alertmanager第三方告警插件使用之钉钉告警
1.安装第三方告警插件配置文件root@k8s-60 aler]# cat app.conf#---------------------↓全局配置-----------------------appname = PrometheusAlert#监听端口httpport = 8080runmode = dev#设置代理 proxy = http://123.123.123.123:8080proxy
·
1.安装第三方告警插件
配置文件
root@k8s-60 aler]# cat app.conf
#---------------------↓全局配置-----------------------
appname = PrometheusAlert
login_user=prometheusalert
#登录密码
login_password=prometheusalert
httpaddr = "0.0.0.0"
#监听端口
httpport = 8080
runmode = dev
#设置代理 proxy = http://123.123.123.123:8080
proxy =
#开启JSON请求
copyrequestbody = true
#告警消息标题
title=PrometheusAlert
#链接到告警平台地址
GraylogAlerturl=http://graylog.org
#钉钉告警 告警logo图标地址
logourl=https://raw.githubusercontent.com/feiyu563/PrometheusAlert/master/doc/alert-center.png
#钉钉告警 恢复logo图标地址
rlogourl=https://raw.githubusercontent.com/feiyu563/PrometheusAlert/master/doc/alert-center.png
#短信告警级别(等于3就进行短信告警) 告警级别定义 0 信息,1 警告,2 一般严重,3 严重,4 灾难
messagelevel=3
#电话告警级别(等于4就进行语音告警) 告警级别定义 0 信息,1 警告,2 一般严重,3 严重,4 灾难
phonecalllevel=4
#默认拨打号码(页面测试短信和电话功能需要配置此项)
defaultphone=xxxxxxxx
#故障恢复是否启用电话通知0为关闭,1为开启
phonecallresolved=0
#自动告警抑制(自动告警抑制是默认同一个告警源的告警信息只发送告警级别最高的第一条告警信息,其他消息默认屏蔽,这么做的目的是为了减少相同告警来源的消息数量,防止告警炸弹,0为关闭,1为开启)
silent=0
#是否前台输出file or console
logtype=file
#日志文件路径
logpath=logs/prometheusalertcenter.log
#转换Prometheus,graylog告警消息的时区为CST时区(如默认已经是CST时区,请勿开启)
prometheus_cst_time=1
#以上配置是必须要有
#---------------------↓webhook-----------------------
#是否开启钉钉告警通道,可同时开始多个通道0为关闭,1为开启
open-dingding=1
#默认钉钉机器人地址
ddurl=https://oapi.dingtalk.com/robot/send?access_token=1a049fe40f02b614da6dba1d85d908b34b9dc452090380c0345548fad0b54404
#是否开启 @所有人(0为关闭,1为开启)
#是否开启微信告警通道,可同时开始多个通道0为关闭,1为开启
open-weixin=1
#默认企业微信机器人地址
wxurl=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxx
容器版
docker run -d -p 88:8080 -v /config/prometheusalert-center:/app/conf --name prometheusalert-center feiyu563/prometheus-alert:latest
k8s版
kubectl create configmap my-alert-conf --from-file=/opt/aler/app.conf
[root@k8s-60 aler]# kubectl get cm | grep my
my-alert-conf 1 45m
yaml模板
cat <<END> feiyu563.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: alert
spec:
replicas: 1
selector:
matchLabels:
app: prometheusalert
template:
metadata:
labels:
app: prometheusalert
spec:
containers:
- name: prometheusalert
image: feiyu563/prometheus-alert:latest
ports:
- containerPort: 8080
name: http
volumeMounts:
- name: conf
mountPath: /app/conf/
readOnly: true
- name: date-config
mountPath: /etc/localtime
volumes:
- name: conf
configMap:
name: my-alert-conf
- name: date-config
hostPath:
path: /etc/localtime
---
apiVersion: v1
kind: Service
metadata:
name: alert
spec:
selector:
app: prometheusalert
ports:
- name: http
port: 8080
protocol: TCP
nodePort: 18080
targetPort: 8080
type: NodePort
END
可以登录web测试算部署成功
2.安装alertmanager
官网下载软件包
wget https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz
配置
cat alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['instance']
group_wait: 10s
group_interval: 10s
repeat_interval: 1m
receiver: 'web.hook.prometheusalert'
receivers:
- name: 'web.hook.prometheusalert'
webhook_configs:
- url: 'http://172.16.0.61:18080/prometheus/alert' <<----第三方告警的地址就是上面图的那个ui地址后缀必须/prometheus/alert
#- url: 'http://alert:8080/prometheus/alert' k8s使用
语法检查
./amtool check-config alertmanager.yml
启动
nohup ./alertmanager --config.file=./alertmanager.yml &
docker容器版
docker run --name alertmanger -d -p 9093:9093 -v /newmoni/alertmanager.yml:/etc/alertmanager/alertmanager.yml -v /etc/localtime:/etc/localtime:ro -v /etc/timezone:/etc/timezone:ro prom/alertmanager:latest
K8S版
kubectl create configmap conf --from-file=/opt/aler/manager.yml
YAML模板
cat <<END>alert-manget.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: my-alert3
spec:
replicas: 1
selector:
matchLabels:
app: my-alert32
template:
metadata:
labels:
app: my-alert32
spec:
containers:
- name: my-alert32
image: prom/alertmanager:latest
command:
- "/bin/alertmanager"
args:
- "--config.file=/etc/alertmanager/manager.yml"
ports:
- containerPort: 9093
name: http
volumeMounts:
- name: conf
mountPath: /etc/alertmanager/
readOnly: true
- name: dates
mountPath: /etc/localtime
volumes:
- name: conf
configMap:
name: alertmanager
- name: dates
hostPath:
path: /etc/localtime
---
apiVersion: v1
kind: Service
metadata:
name: my-alert3
spec:
selector:
app: my-alert32
ports:
- name: http
port: 9093
protocol: TCP
nodePort: 19093
targetPort: 9093
type: NodePort
END
可以访问下面web ui算成功
最后 prometheus服务端
配置文件
[root@docker63 ~]# cat /monit/prometheus1.yml
global:
alerting:
alertmanagers:
- static_configs:
- targets: ['172.16.0.18:9093']
rule_files:
- "/opt/*.yml" <<<<<-------告警规则
scrape_configs:
- job_name: 'linux'
file_sd_configs:
- files: ['/prometheus/*.yml']
refresh_interval: 5s
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'win7'
static_configs:
- targets: ['172.16.0.8:9182']
- job_name: 'linus'
static_configs:
- targets: ['172.16.0.60:9100','172.16.0.61:9100']
-
告警规则
[root@docker63 ~]# cat /rule/ru.yml
groups:
- name: linux
rules:
- alert: Node-Down
expr: up == 0
for: 1m
labels:
severity: warning
annotations:
summary: "Node has been down"
description: "has been down "
- alert: "内存使用率过高"
expr: round(100- node_memory_MemAvailable_bytes/node_memory_MemTotal_bytes*100) > 80
for: 1m
labels:
severity: warning
annotations:
summary: "内存使用率过高"
description: "当前使用率{{ $value }}%"
- alert: "CPU使用率过高"
expr: round(100 - ((avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle",instance!~'bac-.*'}[5m]))) *100)) > 80
for: 2m
labels:
severity: warning
annotations:
summary: "CPU使用率过高"
description: "当前使用率{{ $value }}%"
- alert: "磁盘使用率过高"
expr: round(100-100*(node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"})) > 80
for: 15s
labels:
severity: warning
annotations:
summary: "磁盘使用率过高"
description: "当前磁盘{{$labels.mountpoint}} 使用率{{ $value }}%"
- alert: "分区容量过低"
expr: round(node_filesystem_avail_bytes{fstype=~"ext4|xfs",instance!~"testnode",mountpoint!~"/boot.*"}/1024/1024/1024) < 10
for: 15s
labels:
severity: warning
annotations:
summary: "分区容量过低"
description: "当前分区{{$labels.mountpoint}} 容量{{ $value }}GB"
- alert: "网络流出速率过高"
expr: round(irate(node_network_receive_bytes_total{instance!~"data.*",device!~'tap.*|veth.*|br.*|docker.*|vir.*|lo.*|vnet.*'}[1m])/1024) > 2048
for: 1m
labels:
severity: warning
annotations:
summary: "网络流出速率过高"
description: "当前速率{{ $value }}KB/s"
最后告警
prometheus容器启动方式
docker run -d \
-p 9090:9090 --name prometheus \
-v /monit/prometheus1.yml:/etc/prometheus/prometheus.yml \
-v /opt/prometheus/data:/prometheus \
-v /rule:/opt \ #告警的配置文件目录
prom/prometheus:latest \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.retention.time=100d \ #数据保留100天
--web.enable-lifecycle
node_exporter安装
# 创建用户
groupadd -r prometheus
useradd -r -g prometheus -s /sbin/nologin -M -c "prometheus Daemons" prometheus
编辑/usr/lib/systemd/system/node_exporter.service
cat <<END> /usr/lib/systemd/system/node_exporter.service
[Service]
User=prometheus
Group=prometheus
ExecStart=/usr/local/bin/node_exporter
[Install]
WantedBy=multi-user.target
[Unit]
Description=node_exporter
After=network.target
END
#启动
systemctl start node_exporter
systemctl enable node_exporter
systemctl status node_exporter.service
更多推荐
已为社区贡献82条内容
所有评论(0)