一、安装Prometheus

下载地址:https://prometheus.io/download/
在这里插入图片描述
解压缩到指定目录

root@k8s-master:~/data# ls
prometheus-2.28.0-rc.0.linux-amd64.tar.gz
root@k8s-master:~/data# ls /usr/local
bin  etc  games  include  lib  man  sbin  share  src
root@k8s-master:~/data# tar xf prometheus-2.28.0-rc.0.linux-amd64.tar.gz -C /usr/local
root@k8s-master:~/data# ls /usr/local
bin  etc  games  include  lib  man  prometheus-2.28.0-rc.0.linux-amd64  sbin  share  src
##创建软链接
root@k8s-master:~/data# ln -sv /usr/local/prometheus-2.28.0-rc.0.linux-amd64 /usr/local/prometheus
'/usr/local/prometheus' -> '/usr/local/prometheus-2.28.0-rc.0.linux-amd64'
root@k8s-master:~/data# ls /usr/local/prometheus
console_libraries  consoles  LICENSE  NOTICE  prometheus  prometheus.yml  promtool

prometheus的配置文件

root@k8s-master:~/data# cat /usr/local/prometheus/prometheus.yml 
# my global config
global:
  ##抓取数据的时间间隔,默认是1分钟
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  ##告警规则、记录规则的评估周期
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.

##指标数据采集相关配置
scrape_configs:
  ##作业名称作为标签“job=<job_name>”添加到从此配置中抓取的任何时间序列中。
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']

启动Prometheus

root@k8s-master:/usr/local/prometheus# ls
console_libraries  consoles  LICENSE  NOTICE  prometheus  prometheus.yml  promtool
root@k8s-master:/usr/local/prometheus# ./prometheus
level=info ts=2021-06-22T05:20:47.556Z caller=main.go:388 msg="No time or size retention was set so using the default time retention" duration=15d
level=info ts=2021-06-22T05:20:47.557Z caller=main.go:426 msg="Starting Prometheus" version="(version=2.28.0-rc.0, branch=HEAD, revision=0cf5706ee0a4fda831f2de1f010b94a957555a5b)"
level=info ts=2021-06-22T05:20:47.557Z caller=main.go:431 build_context="(go=go1.16.5, user=root@6a6c7e8a0d2e, date=20210618-11:01:36)"
level=info ts=2021-06-22T05:20:47.557Z caller=main.go:432 host_details="(Linux 4.15.0-144-generic #148-Ubuntu SMP Sat May 8 02:33:43 UTC 2021 x86_64 k8s-master (none))"
level=info ts=2021-06-22T05:20:47.557Z caller=main.go:433 fd_limits="(soft=1024, hard=1048576)"
level=info ts=2021-06-22T05:20:47.557Z caller=main.go:434 vm_limits="(soft=unlimited, hard=unlimited)"
level=info ts=2021-06-22T05:20:47.560Z caller=web.go:541 component=web msg="Start listening for connections" address=0.0.0.0:9090
level=info ts=2021-06-22T05:20:47.564Z caller=main.go:807 msg="Starting TSDB ..."
ts=2021-06-22T05:20:47.567Z caller=log.go:124 component=web level=info msg="TLS is disabled." http2=false
level=info ts=2021-06-22T05:20:47.574Z caller=head.go:780 component=tsdb msg="Replaying on-disk memory mappable chunks if any"
level=info ts=2021-06-22T05:20:47.575Z caller=head.go:794 component=tsdb msg="On-disk memory mappable chunks replay completed" duration=5.168µs
level=info ts=2021-06-22T05:20:47.575Z caller=head.go:800 component=tsdb msg="Replaying WAL, this may take a while"
level=info ts=2021-06-22T05:20:47.576Z caller=head.go:854 component=tsdb msg="WAL segment loaded" segment=0 maxSegment=0
level=info ts=2021-06-22T05:20:47.576Z caller=head.go:860 component=tsdb msg="WAL replay completed" checkpoint_replay_duration=387.742µs wal_replay_duration=860.496µs total_replay_duration=1.300656ms
level=info ts=2021-06-22T05:20:47.578Z caller=main.go:834 fs_type=EXT4_SUPER_MAGIC
level=info ts=2021-06-22T05:20:47.578Z caller=main.go:837 msg="TSDB started"
level=info ts=2021-06-22T05:20:47.578Z caller=main.go:964 msg="Loading configuration file" filename=prometheus.yml
level=info ts=2021-06-22T05:20:47.579Z caller=main.go:995 msg="Completed loading of configuration file" filename=prometheus.yml totalDuration=996.492µs remote_storage=17.727µs web_handler=568ns query_engine=863ns scrape=367.409µs scrape_sd=112.974µs notify=61.054µs notify_sd=17.23µs rules=2.068µs
level=info ts=2021-06-22T05:20:47.579Z caller=main.go:779 msg="Server is ready to receive web requests."

浏览器访问
在这里插入图片描述
在这里插入图片描述
写一个简单点的表达式:prometheus_http_requests_total
在这里插入图片描述

获取监控数据

在被监控的节点上添加node_exporter

##被监控节点
root@k8s-node1:~# tar xf ./data/node_exporter-1.1.2.linux-amd64.tar.gz -C /usr/local
root@k8s-node1:/usr/local# ln -sv node_exporter-1.1.2.linux-amd64 node_exporter
root@k8s-node1:/usr/local# ls
bin  etc  games  include  lib  man  node_exporter  node_exporter-1.1.2.linux-amd64  sbin  share  src
root@k8s-node1:/usr/local# cd node_exporter
root@k8s-node1:/usr/local/node_exporter# ls
LICENSE  node_exporter  NOTICE
##运行node_exporter
root@k8s-node1:/usr/local/node_exporter# ./node_exporter
level=info ts=2021-06-22T08:36:13.869Z caller=node_exporter.go:178 msg="Starting node_exporter" version="(version=1.1.2, branch=HEAD, revision=b597c1244d7bef49e6f3359c87a56dd7707f6719)"
level=info ts=2021-06-22T08:36:13.869Z caller=node_exporter.go:179 msg="Build context" build_context="(go=go1.15.8, user=root@f07de8ca602a, date=20210305-09:29:10)"
level=warn ts=2021-06-22T08:36:13.869Z caller=node_exporter.go:181 msg="Node Exporter is running as root user. This exporter is designed to run as unpriviledged user, root is not required."
level=info ts=2021-06-22T08:36:13.870Z caller=filesystem_common.go:74 collector=filesystem msg="Parsed flag --collector.filesystem.ignored-mount-points" flag=^/(dev|proc|sys|var/lib/docker/.+)($|/)
level=info ts=2021-06-22T08:36:13.870Z caller=filesystem_common.go:76 collector=filesystem msg="Parsed flag --collector.filesystem.ignored-fs-types" flag=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
level=info ts=2021-06-22T08:36:13.871Z caller=node_exporter.go:106 msg="Enabled collectors"
level=info ts=2021-06-22T08:36:13.871Z caller=node_exporter.go:113 collector=arp
level=info ts=2021-06-22T08:36:13.871Z caller=node_exporter.go:113 collector=bcache

修改server端的Prometheus主配置文件,添加job相关配置信息

root@k8s-master:/usr/local/prometheus# cat prometheus.yml 
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']

###添加下列配置信息  
  # ALL nodes    
  - job_name: 'nodes'
    static_configs:
    - targets:
      - 192.168.145.129:9100

在这里插入图片描述
示例:
写一个ProQL语句
主机CPU在5分钟内的平均使用率。
avg(irate(node_cpu_seconds_total{mode=“idle”}[5m]))by(instance)
在这里插入图片描述
在这里插入图片描述

创建unit file,让systemd 管理prometheus

创建运行prometheus server进程的系统用户,并为其创建家目录/var/lib/prometheus 作为数据存储目录

 useradd -r -m -d /var/lib/prometheus prometheus

创建unit file

root@k8s-master:/usr/local# vim /usr/lib/systemd/system/prometheus.service 

[Unit]
 Description=The Prometheus 2 monitoring system and time series database.
 Documentation=https://prometheus.io
 After=network.target
[Service]
 EnvironmentFile=-/etc/sysconfig/prometheus
 User=prometheus
 ExecStart=/usr/local/prometheus/prometheus \
 --storage.tsdb.path=/var/lib/prometheus/prometheus \
 --config.file=/usr/local/prometheus/prometheus.yml \
 --web.listen-address=0.0.0.0:9090 \
 --web.external-url= $PROM_EXTRA_ARGS
 Restart=on-failure
 StartLimitInterval=1
 RestartSec=3
 [Install]
 WantedBy=multi-user.target

启动服务

systemctl daemon-reload
systemctl start prometheus.service
Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐