Prometheus部署及应用(1)
Prometheus部署及应用(1)

Prometheus部署及应用(1)

单机部署prometheus

https://prometheus.io/download/
https://github.com/prometheus/prometheus
[root@ubuntu2004 data]#ls
learning-prometheus  node_exporter-1.4.0.linux-amd64.tar.gz  prometheus-2.40.2.linux-amd64.tar.gz
[root@ubuntu2004 data]#tar xf prometheus-2.40.2.linux-amd64.tar.gz  -C /usr/local/

[root@ubuntu2004 local]#ln -sv  prometheus-2.40.2.linux-amd64 prometheus
'prometheus' -> 'prometheus-2.40.2.linux-amd64'
[root@ubuntu2004 local]#ll
总用量 44
drwxr-xr-x 11 root root 4096 11月 22 09:45 ./
drwxr-xr-x 14 root root 4096 2月  23  2022 ../
drwxr-xr-x  2 root root 4096 2月  23  2022 bin/
drwxr-xr-x  2 root root 4096 2月  23  2022 etc/
drwxr-xr-x  2 root root 4096 2月  23  2022 games/
drwxr-xr-x  2 root root 4096 2月  23  2022 include/
drwxr-xr-x  3 root root 4096 2月  23  2022 lib/
lrwxrwxrwx  1 root root    9 2月  23  2022 man -> share/man/
lrwxrwxrwx  1 root root   29 11月 22 09:45 prometheus -> prometheus-2.40.2.linux-amd64/
drwxr-xr-x  4 1001  121 4096 11月 17 22:06 prometheus-2.40.2.linux-amd64/
drwxr-xr-x  2 root root 4096 2月  23  2022 sbin/
drwxr-xr-x  4 root root 4096 2月  23  2022 share/
drwxr-xr-x  2 root root 4096 2月  23  2022 src/

[root@ubuntu2004 prometheus]#useradd -s /sbin/nologin  prometheus

[root@ubuntu2004 prometheus]#ls
console_libraries  consoles  LICENSE  NOTICE  prometheus  prometheus.service  prometheus.yml  prometheus.yml.bak  promtool
[root@ubuntu2004 prometheus]#./prometheus 
ts=2022-11-22T01:57:04.650Z caller=main.go:512 level=info msg="No time or size retention was set so using the default time retention" duration=15d
ts=2022-11-22T01:57:04.651Z caller=main.go:556 level=info msg="Starting Prometheus Server" mode=server version="(version=2.40.2, branch=HEAD, revision=a07a94a5abb8a979d8aa87297f77f3979148b2da)"
ts=2022-11-22T01:57:04.652Z caller=main.go:561 level=info build_context="(go=go1.19.3, user=root@1b4b53e3f125, date=20221117-13:40:12)"
ts=2022-11-22T01:57:04.653Z caller=main.go:562 level=info host_details="(Linux 5.4.0-124-generic #140-Ubuntu SMP Thu Aug 4 02:23:37 UTC 2022 x86_64 ubuntu2004 (none))"
ts=2022-11-22T01:57:04.653Z caller=main.go:563 level=info fd_limits="(soft=1048576, hard=1048576)"
ts=2022-11-22T01:57:04.654Z caller=main.go:564 level=info vm_limits="(soft=unlimited, hard=unlimited)"
ts=2022-11-22T01:57:04.657Z caller=web.go:559 level=info component=web msg="Start listening for connections" address=0.0.0.0:9090
ts=2022-11-22T01:57:04.659Z caller=main.go:993 level=info msg="Starting TSDB ..."
ts=2022-11-22T01:57:04.660Z caller=tls_config.go:232 level=info component=web msg="Listening on" address=[::]:9090
ts=2022-11-22T01:57:04.660Z caller=tls_config.go:235 level=info component=web msg="TLS is disabled." http2=false address=[::]:9090
ts=2022-11-22T01:57:04.662Z caller=head.go:562 level=info component=tsdb msg="Replaying on-disk memory mappable chunks if any"
ts=2022-11-22T01:57:04.662Z caller=head.go:606 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=2.586?s
ts=2022-11-22T01:57:04.662Z caller=head.go:612 level=info component=tsdb msg="Replaying WAL, this may take a while"
ts=2022-11-22T01:57:04.663Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=0 maxSegment=0
ts=2022-11-22T01:57:04.663Z caller=head.go:720 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=14.855?s wal_replay_duration=1.386087ms wbl_replay_duration=258ns total_replay_duration=1.426833ms
ts=2022-11-22T01:57:04.665Z caller=main.go:1014 level=info fs_type=EXT4_SUPER_MAGIC
ts=2022-11-22T01:57:04.665Z caller=main.go:1017 level=info msg="TSDB started"
ts=2022-11-22T01:57:04.666Z caller=main.go:1197 level=info msg="Loading configuration file" filename=prometheus.yml
ts=2022-11-22T01:57:04.666Z caller=main.go:1234 level=info msg="Completed loading of configuration file" filename=prometheus.yml totalDuration=503.234?s db_storage=899ns remote_storage=1.304?s web_handler=349ns query_engine=533ns scrape=161.395?s scrape_sd=22.788?s notify=18.815?s notify_sd=4.213?s rules=1.097?s tracing=4.86?s
ts=2022-11-22T01:57:04.666Z caller=main.go:978 level=info msg="Server is ready to receive web requests."
ts=2022-11-22T01:57:04.666Z caller=manager.go:944 level=info component="rule manager" msg="Starting rule manager..."
....

[root@ubuntu2004 prometheus]#ss -ntlp
State                     Recv-Q                    Send-Q                                       Local Address:Port                                       Peer Address:Port                   Process                                                       
LISTEN                    0                         4096                                         127.0.0.53%lo:53                                              0.0.0.0:*                       users:(("systemd-resolve",pid=736,fd=13))                    
LISTEN                    0                         128                                                0.0.0.0:22                                              0.0.0.0:*                       users:(("sshd",pid=770,fd=3))                                
LISTEN                    0                         128                                                   [::]:22                                                 [::]:*                       users:(("sshd",pid=770,fd=4))                                
LISTEN                    0                         4096                                                     *:9090                                                  *:*                       users:(("prometheus",pid=2640,fd=7))                         


#service,文件使用systemctl管理
[root@ubuntu2004 prometheus]#cat prometheus.service
[Unit]
Description=Monitoring system and time series database
Documentation=https://prometheus.io/docs/introduction/overview/

[Service]
Restart=always
User=prometheus
EnvironmentFile=-/etc/default/prometheus
ExecStart=/usr/local/prometheus/prometheus \
            --config.file=/usr/local/prometheus/prometheus.yml \
            --storage.tsdb.path=/usr/local/prometheus/data \
            --web.console.libraries=/usr/share/prometheus/console_libraries \
            --web.enable-lifecycle \
            $ARGS
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
SendSIGKILL=no
LimitNOFILE=8192

[Install]
WantedBy=multi-user.target

[root@ubuntu2004 prometheus]#systemctl status prometheus.service 
● prometheus.service - Monitoring system and time series database
     Loaded: loaded (/lib/systemd/system/prometheus.service; disabled; vendor preset: enabled)
     Active: active (running) since Thu 2022-11-24 10:01:05 CST; 3s ago
       Docs: https://prometheus.io/docs/introduction/overview/
   Main PID: 43386 (prometheus)
      Tasks: 8 (limit: 2236)
     Memory: 53.0M
     CGroup: /system.slice/prometheus.service
             └─43386 /usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/usr/local/prometheus/data --web.console.libraries=/usr/share/prometheus/console_libraries --web.enable-lifecycle

11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.887Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=5 maxSegment=6
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.887Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=6 maxSegment=6
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.888Z caller=head.go:720 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=4.266669ms wal_replay_duration=104.568886ms wbl_replay_duration=248ns total_r>
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.890Z caller=main.go:1014 level=info fs_type=EXT4_SUPER_MAGIC
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.890Z caller=main.go:1017 level=info msg="TSDB started"
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.890Z caller=main.go:1197 level=info msg="Loading configuration file" filename=/usr/local/prometheus/prometheus.yml
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.892Z caller=main.go:1234 level=info msg="Completed loading of configuration file" filename=/usr/local/prometheus/prometheus.yml totalDuration=2.076078ms db_storage=2.036µs remote_st>
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.892Z caller=main.go:978 level=info msg="Server is ready to receive web requests."
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.892Z caller=manager.go:944 level=info component="rule manager" msg="Starting rule manager..."
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.894Z caller=consul.go:293 level=error component="discovery manager scrape" discovery=consul msg="Error retrieving datacenter name" err="Get \"http://localhost:8500/v1/agent/self\": >

添加node_exporter

[root@ubuntu2004 prometheus]#./prometheus --web.enable-lifecycle
ts=2022-11-22T02:34:50.388Z caller=main.go:512 level=info msg="No time or size retention was set so using the default time retention" duration=15d
ts=2022-11-22T02:34:50.388Z caller=main.go:556 level=info msg="Starting Prometheus Server" mode=server version="(version=2.40.2, branch=HEAD, revision=a07a94a5abb8a979d8aa87297f77f3979148b2da)"
ts=2022-11-22T02:34:50.388Z caller=main.go:561 level=info build_context="(go=go1.19.3, user=root@1b4b53e3f125, date=20221117-13:40:12)"
ts=2022-11-22T02:34:50.388Z caller=main.go:562 level=info host_details="(Linux 5.4.0-124-generic #140-Ubuntu SMP Thu Aug 4 02:23:37 UTC 2022 x86_64 ubuntu2004 (none))"
ts=2022-11-22T02:34:50.388Z caller=main.go:563 level=info fd_limits="(soft=1048576, hard=1048576)"
ts=2022-11-22T02:34:50.388Z caller=main.go:564 level=info vm_limits="(soft=unlimited, hard=unlimited)"
ts=2022-11-22T02:34:50.390Z caller=web.go:559 level=info component=web msg="Start listening for connections" address=0.0.0.0:9090
ts=2022-11-22T02:34:50.390Z caller=main.go:993 level=info msg="Starting TSDB ..."
ts=2022-11-22T02:34:50.395Z caller=tls_config.go:232 level=info component=web msg="Listening on" address=[::]:9090
ts=2022-11-22T02:34:50.395Z caller=tls_config.go:235 level=info component=web msg="TLS is disabled." http2=false address=[::]:9090
ts=2022-11-22T02:34:50.396Z caller=head.go:562 level=info component=tsdb msg="Replaying on-disk memory mappable chunks if any"
ts=2022-11-22T02:34:50.397Z caller=head.go:606 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=719.294µs
ts=2022-11-22T02:34:50.397Z caller=head.go:612 level=info component=tsdb msg="Replaying WAL, this may take a while"
ts=2022-11-22T02:34:50.401Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=0 maxSegment=3
ts=2022-11-22T02:34:50.401Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=1 maxSegment=3
ts=2022-11-22T02:34:50.412Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=2 maxSegment=3
ts=2022-11-22T02:34:50.413Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=3 maxSegment=3
ts=2022-11-22T02:34:50.413Z caller=head.go:720 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=120.616µs wal_replay_duration=16.009663ms wbl_replay_duration=124ns total_replay_duration=16.941701ms
ts=2022-11-22T02:34:50.416Z caller=main.go:1014 level=info fs_type=EXT4_SUPER_MAGIC
ts=2022-11-22T02:34:50.416Z caller=main.go:1017 level=info msg="TSDB started"
ts=2022-11-22T02:34:50.417Z caller=main.go:1197 level=info msg="Loading configuration file" filename=prometheus.yml
ts=2022-11-22T02:34:50.418Z caller=main.go:1234 level=info msg="Completed loading of configuration file" filename=prometheus.yml totalDuration=1.060226ms db_storage=2.109µs remote_storage=2.185µs web_handler=590ns query_engine=995ns scrape=273.086µs scrape_sd=64.143µs notify=36.073µs notify_sd=24.71µs rules=2.353µs tracing=28.653µs
ts=2022-11-22T02:34:50.419Z caller=main.go:978 level=info msg="Server is ready to receive web requests."
ts=2022-11-22T02:34:50.419Z caller=manager.go:944 level=info component="rule manager" msg="Starting rule manager..."
....

[root@ubuntu2004 data]#ls
learning-prometheus  node_exporter-1.4.0.linux-amd64.tar.gz  prometheus-2.40.2.linux-amd64.tar.gz
[root@ubuntu2004 data]#tar xf node_exporter-1.4.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 data]#cd /usr/local/
[root@ubuntu2004 local]#ln -sv node_exporter-1.4.0.linux-amd64/ node_exporter
'node_exporter' -> 'node_exporter-1.4.0.linux-amd64/'

[root@ubuntu2004 local]#cd node_exporter
[root@ubuntu2004 node_exporter]#ls
LICENSE  node_exporter  NOTICE

[root@ubuntu2004 prometheus]#vim prometheus.yml
...
 31   - job_name: "node_exporter"
 32     metrics_path: '/metrics'
 33     scheme: 'http'
 34     static_configs:
 35       - targets:
 36           - "10.0.0.210:9100"
 37           - "10.0.0.209:9100"
 38           - "10.0.0.208:9100"

[root@ubuntu2004 node_exporter]#./node_exporter --collector.ntp --collector.tcpstat --no-collector.zfs
ts=2022-11-22T02:29:14.368Z caller=node_exporter.go:182 level=info msg="Starting node_exporter" version="(version=1.4.0, branch=HEAD, revision=7da1321761b3b8dfc9e496e1a60e6a476fec6018)"
ts=2022-11-22T02:29:14.368Z caller=node_exporter.go:183 level=info msg="Build context" build_context="(go.....
.....

#在线重载配置,注意服务运行时加选项--web.enable-lifecycle
[root@ubuntu2004 ~]#curl -XPOST http://localhost:9090/-/reload

[root@ubuntu2004 ~]#ss -ntlp
State                     Recv-Q                    Send-Q                                       Local Address:Port                                       Peer Address:Port                   Process                                                       
LISTEN                    0                         4096                                         127.0.0.53%lo:53                                              0.0.0.0:*                       users:(("systemd-resolve",pid=735,fd=13))                    
LISTEN                    0                         128                                                0.0.0.0:22                                              0.0.0.0:*                       users:(("sshd",pid=769,fd=3))                                
LISTEN                    0                         4096                                                     *:9100                                                  *:*                       users:(("node_exporter",pid=1687,fd=3))                      
LISTEN                    0                         128                                                   [::]:22                                                 [::]:*                       users:(("sshd",pid=769,fd=4))                                
LISTEN                    0                         4096                                                     *:9090                                                  *:*                       users:(("prometheus",pid=1326,fd=3))                         


#service
[root@ubuntu2004 prometheus]#vim /usr/lib/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target

[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/node_exporter/node_exporter \
  --collector.ntp \
  --collector.mountstats \
  --collector.systemd \
  --collector.ethtool \
  --collector.tcpstat
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always

[Install]
WantedBy=multi-user.target

[root@ubuntu2004 prometheus]#chown -R prometheus. /usr/local/node_exporter 

[root@ubuntu2004 prometheus]#systemctl daemon-reload 
[root@ubuntu2004 prometheus]#systemctl start node_exporter.service 
[root@ubuntu2004 prometheus]#systemctl status node_exporter.service 
● node_exporter.service - node_exporter
     Loaded: loaded (/lib/systemd/system/node_exporter.service; disabled; vendor preset: enabled)
     Active: active (running) since Thu 2022-11-24 10:11:44 CST; 3s ago
       Docs: https://prometheus.io/docs/introduction/overview/
   Main PID: 44033 (node_exporter)
      Tasks: 5 (limit: 2236)
     Memory: 13.5M
     CGroup: /system.slice/node_exporter.service
             └─44033 /usr/local/node_exporter/node_exporter --collector.ntp --collector.mountstats --collector.systemd --collector.ethtool --collector.tcpstat

11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=thermal_zone
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=time
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=timex
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=udp_queues
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=uname
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=vmstat
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=xfs
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=zfs
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:199 level=info msg="Listening on" address=:9100
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.928Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false

服务发现

文件服务发现

[root@ubuntu2004 prometheus]#mkdir targets
[root@ubuntu2004 prometheus]#cd targets/
[root@ubuntu2004 targets]#vim nodes-linux.yml
- targets:
    - 10.0.0.208:9100
    - 10.0.0.209:9100
    - 10.0.0.210:9100
  labels:
    os: ubuntu

[root@ubuntu2004 targets]#cd ..
[root@ubuntu2004 prometheus]#vim prometheus.yml
....
  - job_name: "node_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    file_sd_configs:
      - files:
          - targets/nodes-*.yml
        refresh_interval: 2m

[root@ubuntu2004 prometheus]#curl -XPOST http://localhost:9090/-/reload

consule服务发现

https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
https://developer.hashicorp.com/consul/downloads
[root@ubuntu2004 data]#unzip consul_1.14.1_linux_amd64.zip -d /usr/local/bin/
Archive:  consul_1.14.1_linux_amd64.zip
  inflating: /usr/local/bin/consul   

[root@ubuntu2004 data]#mkdir -pv /consul/data/
mkdir: 已创建目录 '/consul'
mkdir: 已创建目录 '/consul/data/'
[root@ubuntu2004 data]#mkdir /etc/console/
[root@ubuntu2004 data]#consul agent -dev -ui -data-dir=/consul/data/ -config-dir=/etc/console/ -client=0.0.0.0
==> Starting Consul agent...
              Version: '1.14.1'
           Build Date: '2022-11-21 16:56:07 +0000 UTC'
              Node ID: '499b3393-c5f2-cc1b-face-31e46352e0ce'
            Node name: 'ubuntu2004'
           Datacenter: 'dc1' (Segment: '<all>')
               Server: true (Bootstrap: false)
          Client Addr: [0.0.0.0] (HTTP: 8500, HTTPS: -1, gRPC: 8502, gRPC-TLS: 8503, DNS: 8600)
         Cluster Addr: 127.0.0.1 (LAN: 8301, WAN: 8302)
    Gossip Encryption: false
     Auto-Encrypt-TLS: false
            HTTPS TLS: Verify Incoming: false, Verify Outgoing: false, Min Version: TLSv1_2
             gRPC TLS: Verify Incoming: false, Min Version: TLSv1_2
     Internal RPC TLS: Verify Incoming: false, Verify Outgoing: false (Verify Hostname: false), Min Version: TLSv1_2

==> Log data will now stream in as it occurs:
....

[root@ubuntu2004 ~]#ss -ntlp
State                     Recv-Q                    Send-Q                                       Local Address:Port                                        Peer Address:Port                    Process                                                     
LISTEN                    0                         4096                                             127.0.0.1:8300                                             0.0.0.0:*                        users:(("consul",pid=3153,fd=6))                           
LISTEN                    0                         4096                                             127.0.0.1:8301                                             0.0.0.0:*                        users:(("consul",pid=3153,fd=9))                           
LISTEN                    0                         4096                                             127.0.0.1:8302                                             0.0.0.0:*                        users:(("consul",pid=3153,fd=7))                           
LISTEN                    0                         4096                                         127.0.0.53%lo:53                                               0.0.0.0:*                        users:(("systemd-resolve",pid=735,fd=13))                  
LISTEN                    0                         128                                                0.0.0.0:22                                               0.0.0.0:*                        users:(("sshd",pid=769,fd=3))                              
LISTEN                    0                         4096                                                     *:9100                                                   *:*                        users:(("node_exporter",pid=1687,fd=3))                    
LISTEN                    0                         4096                                                     *:8500                                                   *:*                        users:(("consul",pid=3153,fd=17))                          
LISTEN                    0                         4096                                                     *:8502                                                   *:*                        users:(("consul",pid=3153,fd=18))                          
LISTEN                    0                         128                                                   [::]:22                                                  [::]:*                        users:(("sshd",pid=769,fd=4))                              
LISTEN                    0                         4096                                                     *:8503                                                   *:*                        users:(("consul",pid=3153,fd=19))                          
LISTEN                    0                         4096                                                     *:8600                                                   *:*                        users:(("consul",pid=3153,fd=16))                          
LISTEN                    0                         4096                                                     *:9090                                                   *:*                        users:(("prometheus",pid=1945,fd=7))                       


#service文件
[root@ubuntu2004 prometheus]#vim /usr/lib/systemd/system/consul.service
[Unit]
Description="HashiCorp Consul - A service mesh solution"
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target

[Service]
EnvironmentFile=-/etc/consul.d/consul.env
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -dev -bootstrap \
            -config-dir /usr/local/consul/config \
            -data-dir /usr/local/consul/data \
            -ui \
            -log-level INFO \
            -bind 127.0.0.1 \
            -client 0.0.0.0
ExecReload=/bin/kill --signal HUP $MAINPID
KillMode=process
KillSignal=SIGTERM
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target

[root@ubuntu2004 prometheus]#useradd -s /sbin/nologin consul
[root@ubuntu2004 prometheus]#systemctl daemon-reload 

[root@ubuntu2004 prometheus]#mkdir /usr/local/consul/config -p
[root@ubuntu2004 prometheus]#mkdir /usr/local/consul/data
[root@ubuntu2004 prometheus]#chown -R consul. /usr/local/consul/

[root@ubuntu2004 prometheus]#mv /usr/local/bin/consul /usr/bin/
[root@ubuntu2004 prometheus]#systemctl start consul.service 
[root@ubuntu2004 prometheus]#systemctl status consul.service 
● consul.service - "HashiCorp Consul - A service mesh solution"
     Loaded: loaded (/lib/systemd/system/consul.service; disabled; vendor preset: enabled)
     Active: active (running) since Thu 2022-11-24 10:08:05 CST; 1s ago
       Docs: https://www.consul.io/
   Main PID: 43826 (consul)
      Tasks: 8 (limit: 2236)
     Memory: 89.5M
     CGroup: /system.slice/consul.service
             └─43826 /usr/bin/consul agent -dev -bootstrap -config-dir /usr/local/consul/config -data-dir /usr/local/consul/data -ui -log-level INFO -bind 127.0.0.1 -client 0.0.0.0

11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.735+0800 [INFO]  agent.leader: started routine: routine="virtual IP version check"
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.736+0800 [INFO]  agent.server: member joined, marking health alive: member=ubuntu2004 partition=default
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.738+0800 [INFO]  agent.leader: stopping routine: routine="virtual IP version check"
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.738+0800 [INFO]  agent.leader: stopped routine: routine="virtual IP version check"
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.928+0800 [INFO]  agent: Synced node info
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.928+0800 [INFO]  agent: Synced service: service=node_exporter-node03
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.928+0800 [INFO]  agent: Synced service: service=node_exporter-node01
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.929+0800 [INFO]  agent: Synced service: service=node_exporter-node02
11月 24 10:08:06 ubuntu2004 consul[43826]: 2022-11-24T10:08:06.021+0800 [INFO]  agent.server: federation state anti-entropy synced
11月 24 10:08:06 ubuntu2004 consul[43826]: 2022-11-24T10:08:06.149+0800 [WARN]  agent: Check is now critical: check=service:node_exporter-node02
[root@ubuntu2004 data]#vim /etc/console/node.json
{
  "services": [{
     "id": "node_exporter-node01",
     "name": "node01",
     "address": "10.0.0.210",
     "port": 9100,
     "tags": ["nodes"],
     "checks": [{
       "http": "http://10.0.0.210:9100/metrics",
       "interval": "5s"
     }]
  },
  {
     "ID": "node_exporter-node02",
     "Name": "node02",
     "Address": "10.0.0.209",
     "Port": 9100,
     "Tags": ["nodes"],
     "Checks": [{
        "http": "http://10.0.0.209:9100/metrics",
        "interval": "5s"
     }]
  },
  {
     "ID": "node_exporter-node03",
     "Name": "node03",
     "Address": "10.0.0.208",
     "Port": 9100,
     "Tags": ["nodes"],
     "Checks": [{
        "http": "http://10.0.0.208:9100/metrics",
        "interval": "5s"
     }]
  }]
}

#重启consul
[root@ubuntu2004 data]#vim /usr/local/prometheus/prometheus.yml
 - job_name: "node_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nodes"
      refresh_interval: 2m

[root@ubuntu2004 data]#curl -XPOST http://localhost:9090/-/reload

部署consul_exporter

[root@ubuntu2004 data]#tar -xf consul_exporter-0.8.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 data]#cd /usr/local/
[root@ubuntu2004 local]#
[root@ubuntu2004 local]#ls
bin  consul  consul_exporter-0.8.0.linux-amd64  docker  etc  games  include  lib  man  node_exporter  node_exporter-1.4.0.linux-amd64  prometheus  prometheus-2.40.2.linux-amd64  sbin  share  src
[root@ubuntu2004 local]#ln -sv consul_exporter-0.8.0.linux-amd64/ consul_exporter
'consul_exporter' -> 'consul_exporter-0.8.0.linux-amd64/'
[root@ubuntu2004 local]#ll
总用量 60
drwxr-xr-x 15 root       root       4096 11月 24 10:27 ./
drwxr-xr-x 14 root       root       4096 2月  23  2022 ../
drwxr-xr-x  3 root       root       4096 11月 24 10:08 bin/
drwxr-xr-x  4 consul     consul     4096 11月 24 10:06 consul/
lrwxrwxrwx  1 root       root         34 11月 24 10:27 consul_exporter -> consul_exporter-0.8.0.linux-amd64//
drwxr-xr-x  2       3434       3434 4096 2月  11  2022 consul_exporter-0.8.0.linux-amd64/
drwxrwxr-x  2 wang       wang       4096 10月 14 00:50 docker/
drwxr-xr-x  2 root       root       4096 2月  23  2022 etc/
drwxr-xr-x  2 root       root       4096 2月  23  2022 games/
drwxr-xr-x  2 root       root       4096 2月  23  2022 include/
drwxr-xr-x  4 root       root       4096 11月 22 14:25 lib/
lrwxrwxrwx  1 root       root          9 2月  23  2022 man -> share/man/
lrwxrwxrwx  1 prometheus prometheus   32 11月 22 10:25 node_exporter -> node_exporter-1.4.0.linux-amd64//
drwxr-xr-x  2       3434       3434 4096 9月  26 20:39 node_exporter-1.4.0.linux-amd64/
lrwxrwxrwx  1 prometheus prometheus   29 11月 22 09:45 prometheus -> prometheus-2.40.2.linux-amd64/
drwxr-xr-x  6 prometheus prometheus 4096 11月 24 10:01 prometheus-2.40.2.linux-amd64/
drwxr-xr-x  2 root       root       4096 2月  23  2022 sbin/
drwxr-xr-x  4 root       root       4096 2月  23  2022 share/
drwxr-xr-x  2 root       root       4096 2月  23  2022 src/
[root@ubuntu2004 local]#chown -R consul. consul_exporter

#service文件
[root@ubuntu2004 local]#vim /usr/lib/systemd/system/consul_exporter.service
[Unit]
Description=consul_exporter
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target

[Service]
Type=simple
User=consul
EnvironmentFile=-/etc/default/consul_exporter
# 具体使用时,若consul_exporter与consul server不在同一主机时,consul server要指向实际的地址;
ExecStart=/usr/local/consul_exporter/consul_exporter \
            --consul.server="http://localhost:8500" \
            --web.listen-address=":9107" \
            --web.telemetry-path="/metrics" \
            --log.level=info \
            $ARGS
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always

[Install]
WantedBy=multi-user.target

[root@ubuntu2004 local]#systemctl daemon-reload 
[root@ubuntu2004 local]#systemctl start consul_exporter.service 
[root@ubuntu2004 local]#systemctl status consul_exporter.service 
● consul_exporter.service - consul_exporter
     Loaded: loaded (/lib/systemd/system/consul_exporter.service; disabled; vendor preset: enabled)
     Active: active (running) since Thu 2022-11-24 10:30:22 CST; 10s ago
       Docs: https://prometheus.io/docs/introduction/overview/
   Main PID: 44688 (consul_exporter)
      Tasks: 4 (limit: 2236)
     Memory: 2.0M
     CGroup: /system.slice/consul_exporter.service
             └─44688 /usr/local/consul_exporter/consul_exporter --consul.server=http://localhost:8500 --web.listen-address=:9107 --web.telemetry-path=/metrics --log.level=info

11月 24 10:30:22 ubuntu2004 systemd[1]: Started consul_exporter.
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.209Z caller=consul_exporter.go:80 level=info msg="Starting consul_exporter" version="(version=0.8.0, branch=HEAD, revision=176aef0f2d437e9fd1cb3a9e29dc4730de717e05)"
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.210Z caller=consul_exporter.go:81 level=info build_context="(go=go1.17.6, user=root@566e953b1722, date=20220210-16:54:21)"
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.210Z caller=consul_exporter.go:132 level=info msg="Listening on address" address=:9107
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.211Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
[root@ubuntu2004 local]#ss -ntl
State                         Recv-Q                        Send-Q                                               Local Address:Port                                                 Peer Address:Port                        Process                        
LISTEN                        0                             4096                                                     127.0.0.1:8300                                                      0.0.0.0:*                                                          
LISTEN                        0                             4096                                                     127.0.0.1:8301                                                      0.0.0.0:*                                                          
LISTEN                        0                             4096                                                     127.0.0.1:8302                                                      0.0.0.0:*                                                          
LISTEN                        0                             4096                                                 127.0.0.53%lo:53                                                        0.0.0.0:*                                                          
LISTEN                        0                             128                                                        0.0.0.0:22                                                        0.0.0.0:*                                                          
LISTEN                        0                             4096                                                             *:9090                                                            *:*                                                          
LISTEN                        0                             4096                                                             *:9100                                                            *:*                                                          
LISTEN                        0                             4096                                                             *:9107                                                            *:*                                                          
LISTEN                        0                             4096                                                             *:8500                                                            *:*                                                          
LISTEN                        0                             4096                                                             *:8502                                                            *:*                                                          
LISTEN                        0                             128                                                           [::]:22                                                           [::]:*                                                          
LISTEN                        0                             4096                                                             *:8503                                                            *:*                                                          
LISTEN                        0                             4096                                                             *:8600                                                            *:*     
[root@ubuntu2004 config]#cat consul_exporter.json 
{
     "id": "consul_exporter",
     "name": "consul_exporter",
     "address": "10.0.0.210",
     "port": 9107,
     "tags": ["consul_exporter"],
     "checks": [{
       "http": "http://10.0.0.210:9107/metrics",
       "interval": "5s"
     }]
}

[root@ubuntu2004 config]#curl -XPUT --data @consul_exporter.json localhost:8500/v1/agent/service/register
[root@ubuntu2004 config]#vim /usr/local/prometheus/prometheus.yml
...
  - job_name: "consul_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "consul_exporter"
      refresh_interval: 1m
[root@ubuntu2004 config]#curl -XPOST http://localhost:9090/-/reload

MySqld-exporter部署

[root@ubuntu2004 local]#tar xf mysqld_exporter-0.14.0.linux-amd64.tar.gz -C /usr/local/

[root@ubuntu2004 local]#ln -sv /usr/local/mysqld_exporter-0.14.0.linux-amd64 /usr/local/mysqld_exporter
'/usr/local/mysqld_exporter' -> '/usr/local/mysqld_exporter-0.14.0.linux-amd64'

[root@ubuntu2004 local]#vim /usr/lib/systemd/system/mysqld_exporter.service
[Unit]
Description=consul_exporter
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target

[Service]
Type=simple
User=mysql
EnvironmentFile=-/etc/default/mysqld_exporter
# 具体使用时,若mysql_exporter与mysql server不在同一主机时,mysql server要指向实际的地址;
# mysql_exporter连接mysql server使用的用户名和密码均为exporter,该用户要获得正确的授权;
Environment='DATA_SOURCE_NAME=exporter:exporter@(localhost:3306)'
ExecStart=/usr/local/mysqld_exporter/mysqld_exporter \
            --web.listen-address=":9104" \
            --web.telemetry-path="/metrics" \
            --collect.info_schema.innodb_tablespaces \
            --collect.info_schema.innodb_metrics \
            --collect.global_status \
            --collect.global_variables \
            --collect.slave_status \
            --collect.engine_innodb_status \
            $ARGS
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always

[Install]
WantedBy=multi-user.target
                                                                                                                                                                
[root@ubuntu2004 local]#chown -R  mysql. /usr/local/mysqld_exporter

[root@ubuntu2004 local]#mysql
Welcome to the MySQL monitor.  Commands end with ; or \g.
Your MySQL connection id is 9
Server version: 8.0.31-0ubuntu0.20.04.2 (Ubuntu)

Copyright (c) 2000, 2022, Oracle and/or its affiliates.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.

mysql> CREATE USER 'exporter'@'localhost' IDENTIFIED BY 'exporter';
Query OK, 0 rows affected (0.02 sec)

mysql> GRANT PROCESS, REPLICATION CLIENT ON *.* TO 'exporter'@'localhost';
Query OK, 0 rows affected (0.01 sec)

mysql> GRANT SELECT ON performance_schema.* TO 'exporter'@'localhost';
Query OK, 0 rows affected (0.00 sec)

mysql> FLUSH PRIVILEGES;
Query OK, 0 rows affected (0.01 sec)

mysql> exit
Bye

[root@ubuntu2004 local]#systemctl daemon-reload 
[root@ubuntu2004 local]#systemctl start mysqld_exporter.service 
[root@ubuntu2004 local]#systemctl status mysqld_exporter.service 
● mysqld_exporter.service - consul_exporter
     Loaded: loaded (/lib/systemd/system/mysqld_exporter.service; disabled; vendor preset: enabled)
     Active: active (running) since Thu 2022-11-24 10:55:13 CST; 4s ago
       Docs: https://prometheus.io/docs/introduction/overview/
   Main PID: 46014 (mysqld_exporter)
      Tasks: 5 (limit: 2236)
     Memory: 1.9M
     CGroup: /system.slice/mysqld_exporter.service
             └─46014 /usr/local/mysqld_exporter/mysqld_exporter --web.listen-address=:9104 --web.telemetry-path=/metrics --collect.info_schema.innodb_tablespaces --collect.info_schema.innodb_metrics --collect.global_status --collect.global_variables ->

11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_tablespaces
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_metrics
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=global_status
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=global_variables
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.868Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_cmp
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.870Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_cmpmem
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.871Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.query_response_time
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.871Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=engine_innodb_status
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.871Z caller=mysqld_exporter.go:303 level=info msg="Listening on address" address=:9104
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.872Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
[root@ubuntu2004 local]#ss -ntl
State                         Recv-Q                        Send-Q                                               Local Address:Port                                                 Peer Address:Port                        Process                        
LISTEN                        0                             70                                                       127.0.0.1:33060                                                     0.0.0.0:*                                                          
LISTEN                        0                             151                                                      127.0.0.1:3306                                                      0.0.0.0:*                                                          
LISTEN                        0                             4096                                                 127.0.0.53%lo:53                                                        0.0.0.0:*                                                          
LISTEN                        0                             128                                                        0.0.0.0:22                                                        0.0.0.0:*                                                          
LISTEN                        0                             4096                                                             *:9100                                                            *:*                                                          
LISTEN                        0                             4096                                                             *:9104                                                            *:*                                                          
LISTEN                        0                             128                                                           [::]:22                                                           [::]:*   
#注册入consul中,并被服务发现
[root@ubuntu2004 config]#vim mysqld_exporter.json 
{
     "id": "mysqld_exporter",
     "name": "mysqld_exporter",
     "address": "10.0.0.209",
     "port": 9104,
     "tags": ["mysqld_exporter"],
     "checks": [{
       "http": "http://10.0.0.209:9104/metrics",
       "interval": "5s"
     }]
}

[root@ubuntu2004 config]#curl -XPUT --data @mysqld_exporter.json localhost:8500/v1/agent/service/register

[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
  - job_name: "mysqld_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "mysqld_exporter"
      refresh_interval: 1m

[root@ubuntu2004 prometheus]#./promtool check config ./prometheus.yml
Checking ./prometheus.yml
 SUCCESS: ./prometheus.yml is valid prometheus config file syntax

[root@ubuntu2004 prometheus]#curl -XPOST http://localhost:9090/-/reload

Nginx_exporter部署

#容器化运行nginx和nginx_exporter
[root@ubuntu2004 nginx-and-exporter]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "nginx-and-exporter_monitoring" with driver "bridge"
Creating nginx-and-exporter_nginx_1 ... done
Creating nginx-and-exporter_nginx-exporter_1 ... done

[root@ubuntu2004 nginx-and-exporter]#tree .
.
├── docker-compose.yml
└── nginx
    └── stub_status-server.conf

[root@ubuntu2004 nginx-and-exporter]#cat docker-compose.yml 
version: '3.6'

networks:
  monitoring:
    driver: bridge
    ipam:
      config:
        - subnet: 172.31.107.0/24

services:
  nginx:
    image: nginx:1.22.1
    volumes:
      - ./nginx/stub_status-server.conf:/etc/nginx/conf.d/stub_status-server.conf:ro
    networks:
      - monitoring
    expose:
      - 8080
      - 80
    ports:
      - 80:80

  nginx-exporter:
    image: nginx/nginx-prometheus-exporter:0.11
    command:
      - '-nginx.scrape-uri=http://nginx:8080/stub_status'
    networks:
      - monitoring
    ports:
      - '9113:9113'
    depends_on:
      - nginx
            
[root@ubuntu2004 nginx-and-exporter]#cat nginx/stub_status-server.conf 
server {
    listen 8080;
    server_name localhost;

    location /stub_status {
        stub_status;

        access_log off;
        #allow 172.31.0.0/16;
        #deny all;
    }
}

[root@ubuntu2004 nginx-and-exporter]#ss -ntlp
State                     Recv-Q                    Send-Q                                       Local Address:Port                                       Peer Address:Port                   Process                                                       
LISTEN                    0                         4096                                               0.0.0.0:80                                              0.0.0.0:*                       users:(("docker-proxy",pid=52611,fd=4))                      
LISTEN                    0                         4096                                         127.0.0.53%lo:53                                              0.0.0.0:*                       users:(("systemd-resolve",pid=734,fd=13))                    
LISTEN                    0                         128                                                0.0.0.0:22                                              0.0.0.0:*                       users:(("sshd",pid=768,fd=3))                                
LISTEN                    0                         4096                                               0.0.0.0:9113                                            0.0.0.0:*                       users:(("docker-proxy",pid=52744,fd=4))                      
LISTEN                    0                         4096                                                     *:9100                                                  *:*                       users:(("node_exporter",pid=43836,fd=7))                     
LISTEN                    0                         4096                                                  [::]:80                                                 [::]:*                       users:(("docker-proxy",pid=52618,fd=4))                      
LISTEN                    0                         128                                                   [::]:22                                                 [::]:*                       users:(("sshd",pid=768,fd=4))                                
LISTEN                    0                         4096                                                  [::]:9113                                               [::]:*                       users:(("docker-proxy",pid=52751,fd=4))                      

[root@ubuntu2004 nginx-and-exporter]#curl 10.0.0.208:9113/metrics
# HELP nginx_connections_accepted Accepted client connections
# TYPE nginx_connections_accepted counter
nginx_connections_accepted 1
# HELP nginx_connections_active Active client connections
# TYPE nginx_connections_active gauge
nginx_connections_active 1
# HELP nginx_connections_handled Handled client connections
# TYPE nginx_connections_handled counter
nginx_connections_handled 1
# HELP nginx_connections_reading Connections where NGINX is reading the request header
# TYPE nginx_connections_reading gauge
nginx_connections_reading 0
# HELP nginx_connections_waiting Idle client connections
# TYPE nginx_connections_waiting gauge
nginx_connections_waiting 0
# HELP nginx_connections_writing Connections where NGINX is writing the response back to the client
# TYPE nginx_connections_writing gauge
nginx_connections_writing 1
# HELP nginx_http_requests_total Total http requests
# TYPE nginx_http_requests_total counter
nginx_http_requests_total 2
# HELP nginx_up Status of the last metric scrape
# TYPE nginx_up gauge
nginx_up 1
# HELP nginxexporter_build_info Exporter build information
# TYPE nginxexporter_build_info gauge
nginxexporter_build_info{arch="linux/amd64",commit="e4a6810d4f0b776f7fde37fea1d84e4c7284b72a",date="2022-09-07T21:09:51Z",dirty="false",go="go1.19",version="0.11.0"} 1
#服务注册及服务发现
[root@ubuntu2004 config]#vim nginx_exporter.json 
{
     "id": "nginx_exporter",
     "name": "nginx_exporter",
     "address": "10.0.0.208",
     "port": 9113,
     "tags": ["nginx_exporter"],
     "checks": [{
       "http": "http://10.0.0.208:9113/metrics",
       "interval": "5s"
     }]
}

[root@ubuntu2004 config]#curl -XPUT --data @nginx_exporter.json localhost:8500/v1/agent/service/register

[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
  - job_name: "nginx_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nginx_exporter"
      refresh_interval: 1m

"prometheus/prometheus.yml" 66L, 1725C 已写入                                                                                                                                                                                             
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload

blackbox-exporter部署

[root@ubuntu2004 blackbox-exporter]#cat docker-compose.yml 
version: '3.6'

networks:
  monitoring:
    driver: bridge
    ipam:
      config:
        - subnet: 172.31.136.0/24

services:
  blackbox_exporter:
    image: prom/blackbox-exporter:v0.22.0
    volumes:
      - ./blackboxexporter/:/etc/blackboxexporter/
    command:
      - '--config.file=/etc/blackboxexporter/config.yml'
    networks:
      - monitoring
    ports:
      - 9115:9115
[root@ubuntu2004 blackbox-exporter]#cat blackboxexporter/config.yml 
modules:
  # https://github.com/prometheus/blackbox_exporter/blob/master/example.yml
  http_2xx:
    prober: http
    timeout: 5s
    http:
      valid_http_versions: 
      - "HTTP/1.1"
      - "HTTP/2"
      valid_status_codes: []  # Defaults to 2xx
      enable_http2: false
      method: GET
      no_follow_redirects: false
      # fail_if_ssl为true时,表示如果站点启用了SSL则探针失败,反之成功; 
      # fail_if_not_ssl刚好相反;
      fail_if_ssl: false
      fail_if_not_ssl: false
      #  fail_if_body_matches_regexp, fail_if_body_not_matches_regexp, fail_if_header_matches, fail_if_header_not_matches
      #  可以定义一组正则表达式,用于验证HTTP返回内容是否符合或者不符合正则表达式的内容
      fail_if_body_matches_regexp:
        - "Could not connect to database"
      tls_config:
        insecure_skip_verify: false
      preferred_ip_protocol: "ip4" # defaults to "ip6"

[root@ubuntu2004 blackbox-exporter]#docker-compose up -d 
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "blackbox-exporter_monitoring" with driver "bridge"
Pulling blackbox_exporter (prom/blackbox-exporter:v0.22.0)...
v0.22.0: Pulling from prom/blackbox-exporter
19d511225f94: Pull complete
f8b4a0d0d975: Pull complete
73c8559532e0: Pull complete
c1abff7c7d36: Pull complete
Digest: sha256:608acee5704ad49c3308b900230dfc00b25da0c90425f8fed55cf005e07f521b
Status: Downloaded newer image for prom/blackbox-exporter:v0.22.0
Creating blackbox-exporter_blackbox_exporter_1 ... done
[root@ubuntu2004 local]#cat prometheus/prometheus.yml
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label job=<job_name> to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["localhost:9090"]

  - job_name: "node_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nodes"
      refresh_interval: 1m
  
  - job_name: "consul_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "consul_exporter"
      refresh_interval: 1m
  
  - job_name: "mysqld_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "mysqld_exporter"
      refresh_interval: 1m

  - job_name: "nginx_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nginx_exporter"
      refresh_interval: 1m

  - job_name: 'blackbox'
    metrics_path: /probe
    params:
      module: [http_2xx]  # Look for a HTTP 200 response.
    static_configs:
    - targets:
      - "www.baidu.com"
      - "www.google.com"
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: "10.0.0.208:9115"  # 指向实际的Blackbox exporter.
      - target_label: region
        replacement: "remote"
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload

查询持久化

[root@ubuntu2004 prometheus]#cat rules/record-rules-node.yml 
groups:
- name: custom_rules
  interval: 5s
  rules:
  - record: instance:node_cpu:avg_rate5m
    expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100

  - record: instace:node_memory_MemFree_percent
    expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes

  - record: instance:root:node_filesystem_free_percent
    expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}
[root@ubuntu2004 prometheus]#cat rules/record-rules-mysqld.yml 
groups:
- name: mysqld_rules
  rules:

  # Record slave lag seconds for pre-computed timeseries that takes
  # mysql_slave_status_sql_delay into account
  - record: instance:mysql_slave_lag_seconds
    expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay

  # Record slave lag via heartbeat method
  - record: instance:mysql_heartbeat_lag_seconds
    expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds

  - record: job:mysql_transactions:rate5m
    expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))


[root@ubuntu2004 prometheus]#ls rules/
record-rules-mysqld.yml  record-rules-node.yml

[root@ubuntu2004 prometheus]#vim prometheus.yml
...
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
  - rules/record-rules-*.yml
...

部署grafana

https://grafana.com/grafana/download
[root@ubuntu2004 ~]#apt-get install -y adduser libfontconfig1
[root@ubuntu2004 ~]#wget https://dl.grafana.com/enterprise/release/grafana-enterprise_9.2.6_amd64.deb
[root@ubuntu2004 ~]#dpkg -i grafana-enterprise_9.2.6_amd64.deb
正在选中未选择的软件包 grafana-enterprise。
(正在读取数据库 ... 系统当前共安装有 114765 个文件和目录。)
准备解压 grafana-enterprise_9.2.6_amd64.deb  ...
正在解压 grafana-enterprise (9.2.6) ...
正在设置 grafana-enterprise (9.2.6) ...
正在添加系统用户"grafana" (UID 113)...
正在将新用户"grafana" (UID 113)添加到组"grafana"...
无法创建主目录"/usr/share/grafana"。
### NOT starting on installation, please execute the following statements to configure grafana to start automatically using systemd
 sudo /bin/systemctl daemon-reload
 sudo /bin/systemctl enable grafana-server
### You can start grafana-server by executing
 sudo /bin/systemctl start grafana-server
正在处理用于 systemd (245.4-4ubuntu3.15) 的触发器 ...

[root@ubuntu2004 ~]#systemctl enable --now grafana-server.service 
Synchronizing state of grafana-server.service with SysV service script with /lib/systemd/systemd-sysv-install.
Executing: /lib/systemd/systemd-sysv-install enable grafana-server
Created symlink /etc/systemd/system/multi-user.target.wants/grafana-server.service → /lib/systemd/system/grafana-server.service.

[root@ubuntu2004 ~]#ss -ntlp
State                     Recv-Q                    Send-Q                                       Local Address:Port                                       Peer Address:Port                   Process                                                       
LISTEN                    0                         4096                                             127.0.0.1:8300                                            0.0.0.0:*                       users:(("consul",pid=43826,fd=6))                            
LISTEN                    0                         4096                                             127.0.0.1:8301                                            0.0.0.0:*                       users:(("consul",pid=43826,fd=9))                            
LISTEN                    0                         4096                                             127.0.0.1:8302                                            0.0.0.0:*                       users:(("consul",pid=43826,fd=7))                            
LISTEN                    0                         4096                                         127.0.0.53%lo:53                                              0.0.0.0:*                       users:(("systemd-resolve",pid=740,fd=13))                    
LISTEN                    0                         128                                                0.0.0.0:22                                              0.0.0.0:*                       users:(("sshd",pid=794,fd=3))                                
LISTEN                    0                         4096                                                     *:9090                                                  *:*                       users:(("prometheus",pid=43386,fd=7))                        
LISTEN                    0                         4096                                                     *:9100                                                  *:*                       users:(("node_exporter",pid=44033,fd=7))                     
LISTEN                    0                         4096                                                     *:9107                                                  *:*                       users:(("consul_exporter",pid=44688,fd=3))                   
LISTEN                    0                         4096                                                     *:8500                                                  *:*                       users:(("consul",pid=43826,fd=17))                           
LISTEN                    0                         4096                                                     *:8502                                                  *:*                       users:(("consul",pid=43826,fd=18))                           
LISTEN                    0                         128                                                   [::]:22                                                 [::]:*                       users:(("sshd",pid=794,fd=4))                                
LISTEN                    0                         4096                                                     *:8503                                                  *:*                       users:(("consul",pid=43826,fd=19))                           
LISTEN                    0                         4096                                                     *:3000                                                  *:*                       users:(("grafana-server",pid=70188,fd=8))                    
LISTEN                    0                         4096                                                     *:8600                                                  *:*                       users:(("consul",pid=43826,fd=16))                           

[root@ubuntu2004 config]#vim grafana.json 
{
     "id": "grafana",
     "name": "grafana",
     "address": "10.0.0.210",
     "port": 3000,
     "tags": ["grafana"],
     "checks": [{
       "http": "http://10.0.0.210:3000/metrics",
       "interval": "5s"
     }]
}

[root@ubuntu2004 config]#curl -XPUT --data @grafana.json http://localhost:8500/v1/agent/service/register

[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
  - job_name: "grafana"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "grafana"
      refresh_interval: 1m


"prometheus/prometheus.yml" 98L, 2540C 已写入                                                                                                                                                                                             
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload

部署alertmanger,实现告警

[root@ubuntu2004 local]#curl -LO https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 24.6M  100 24.6M    0     0   991k      0  0:00:25  0:00:25 --:--:--  688k
[root@ubuntu2004 local]#tar xf alertmanager-0.24.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 local]#ln -sv /usr/local/alertmanager-0.24.0.linux-amd64 /usr/local/alertmanager
'/usr/local/alertmanager' -> '/usr/local/alertmanager-0.24.0.linux-amd64'
[root@ubuntu2004 local]#mkdir /usr/local/alertmanager/data
[root@ubuntu2004 local]#chown -R prometheus.prometheus /usr/local/alertmanager/data

[root@ubuntu2004 local]#vim /usr/lib/systemd/system/alertmanager.service
[Unit]
Description=alertmanager
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target

[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/alertmanager/alertmanager \
            --config.file="/usr/local/alertmanager/alertmanager.yml" \
            --storage.path="/usr/local/alertmanager/data/" \
            --data.retention=120h \
            --log.level=info
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always

[Install]
WantedBy=multi-user.target
                                                                                                                                                                      
[root@ubuntu2004 local]#systemctl daemon-reload 
[root@ubuntu2004 local]#systemctl start alertmanager.service 
[root@ubuntu2004 local]#systemctl status alertmanager.service 
● alertmanager.service - alertmanager
     Loaded: loaded (/lib/systemd/system/alertmanager.service; disabled; vendor preset: enabled)
     Active: active (running) since Thu 2022-11-24 14:41:18 CST; 4s ago
       Docs: https://prometheus.io/docs/introduction/overview/
   Main PID: 53363 (alertmanager)
      Tasks: 8 (limit: 2236)
     Memory: 13.2M
     CGroup: /system.slice/alertmanager.service
             └─53363 /usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml --storage.path=/usr/local/alertmanager/data/ --data.retention=120h --log.level=info

11月 24 14:41:18 ubuntu2004 systemd[1]: Started alertmanager.
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.169Z caller=main.go:231 level=info msg="Starting Alertmanager" version="(version=0.24.0, branch=HEAD, revision=f484b17fa3c583ed1b2c8bbcec20ba1db2aa5f11)"
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.169Z caller=main.go:232 level=info build_context="(go=go1.17.8, user=root@265f14f5c6fc, date=20220325-09:31:33)"
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.181Z caller=cluster.go:185 level=info component=cluster msg="setting advertise address explicitly" addr=10.0.0.210 port=9094
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.182Z caller=cluster.go:680 level=info component=cluster msg="Waiting for gossip to settle..." interval=2s
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.216Z caller=coordinator.go:113 level=info component=configuration msg="Loading configuration file" file=/usr/local/alertmanager/alertmanager.yml
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.216Z caller=coordinator.go:126 level=info component=configuration msg="Completed loading of configuration file" file=/usr/local/alertmanager/alertmanager.yml
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.218Z caller=main.go:535 level=info msg=Listening address=:9093
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.218Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
11月 24 14:41:20 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:20.184Z caller=cluster.go:705 level=info component=cluster msg="gossip not settled" polls=0 before=0 now=1 elapsed=2.000892394s
[root@ubuntu2004 local]#ss -ntlp
State                     Recv-Q                    Send-Q                                       Local Address:Port                                       Peer Address:Port                   Process                                                       
LISTEN                    0                         4096                                             127.0.0.1:8300                                            0.0.0.0:*                       users:(("consul",pid=43826,fd=6))                            
LISTEN                    0                         4096                                             127.0.0.1:8301                                            0.0.0.0:*                       users:(("consul",pid=43826,fd=9))                            
LISTEN                    0                         4096                                             127.0.0.1:8302                                            0.0.0.0:*                       users:(("consul",pid=43826,fd=7))                            
LISTEN                    0                         4096                                         127.0.0.53%lo:53                                              0.0.0.0:*                       users:(("systemd-resolve",pid=740,fd=13))                    
LISTEN                    0                         128                                                0.0.0.0:22                                              0.0.0.0:*                       users:(("sshd",pid=794,fd=3))                                
LISTEN                    0                         4096                                                     *:9090                                                  *:*                       users:(("prometheus",pid=43386,fd=7))                        
LISTEN                    0                         4096                                                     *:9093                                                  *:*                       users:(("alertmanager",pid=53363,fd=8))                      
LISTEN                    0                         4096                                                     *:9094                                                  *:*                       users:(("alertmanager",pid=53363,fd=3))                      
LISTEN                    0                         4096                                                     *:9100                                                  *:*                       users:(("node_exporter",pid=44033,fd=7))                     
LISTEN                    0                         4096                                                     *:9107                                                  *:*                       users:(("consul_exporter",pid=44688,fd=3))                   
LISTEN                    0                         4096                                                     *:8500                                                  *:*                       users:(("consul",pid=43826,fd=17))                           
LISTEN                    0                         4096                                                     *:8502                                                  *:*                       users:(("consul",pid=43826,fd=18))                           
LISTEN                    0                         128                                                   [::]:22                                                 [::]:*                       users:(("sshd",pid=794,fd=4))                                
LISTEN                    0                         4096                                                     *:8503                                                  *:*                       users:(("consul",pid=43826,fd=19))                           
LISTEN                    0                         4096                                                     *:8600                                                  *:*                       users:(("consul",pid=43826,fd=16)) 

邮件或企业微信告警

[root@ubuntu2004 alertmanager]#cat alertmanager.yml 
global:
    resolve_timeout: 1m
    smtp_smarthost: 'smtp.qq.com:465'
    smtp_from: '985347841@qq.com'
    smtp_auth_username: '985347841@qq.com'
    smtp_auth_password: 'iovshgiwohaambdfg'
    smtp_hello: '@qq.com'
    smtp_require_tls: false

route:
    group_by: ['group', 'job', 'alertname'] 
    group_wait: 10s
    group_interval: 10s
    repeat_interval: 10m

    receiver: email
    #receiver: wechat

templates:
  - '/etc/alertmanager/email_template.tmpl'

 # 定义接收者
receivers:
- name: 'email'
  email_configs:
    - to: '2508208842@qq.com'
      headers:
        subject: "{{ .Status | toUpper }} {{ .CommonLabels.env }}:{{ .CommonLabels.cluster }} {{ .CommonLabels.alertname }}"
      html: '{{ template "email.to.html" . }}'
      send_resolved: true 

#- name: 'wechat'
#  wechat_configs:
#    - corp_id: ww4c893118fbf4d07c 
#      to_user: '@all'
#      agent_id: 1000008
#      api_secret: WTepmmaqxbBOeTQOuxa0Olzov_hSEWsZWrPX1k6opMk
#      send_resolved: true

inhibit_rules: 
  - source_match: 
     severity: 'critical' 
    target_match: 
     severity: 'warning' 
    equal: ['alertname', 'instance']

[root@ubuntu2004 local]#cat prometheus/prometheus.yml
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
            - 10.0.0.210:9093
          # - alertmanager:9093
  
[root@ubuntu2004 local]#cat prometheus/rules/alert-rules-blackbox-exporter.yml 
groups:
- name: blackbox
  rules:
  
  # Blackbox probe failed
  - alert: BlackboxProbeFailed
    expr: probe_success == 0
    for: 0m
    labels:
      severity: critical
    annotations:
      summary: Blackbox probe failed (instance {{ $labels.instance }})
      description: "Probe failed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

  # Blackbox slow probe
  - alert: BlackboxSlowProbe
    expr: avg_over_time(probe_duration_seconds[1m]) > 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: Blackbox slow probe (instance {{ $labels.instance }})
      description: "Blackbox probe took more than 1s to complete\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

  # Blackbox probe HTTP failure
  - alert: BlackboxProbeHttpFailure
    expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
    for: 0m
    labels:
      severity: critical
    annotations:
      summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
      description: "HTTP status code is not 200-399\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

  # Blackbox probe slow HTTP
  - alert: BlackboxProbeSlowHttp
    expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
      description: "HTTP request took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

  # Blackbox probe slow ping
  - alert: BlackboxProbeSlowPing
    expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
    for: 1m
    labels:
      severity: warning
    annotations:
      summary: Blackbox probe slow ping (instance {{ $labels.instance }})
      description: "Blackbox ping took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"



# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
  - rules/record-rules-*.yml
  - rules/alert-rules-*.yml

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label job=<job_name> to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["localhost:9090"]

  - job_name: "node_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nodes"
      refresh_interval: 1m
  
  - job_name: "consul_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "consul_exporter"
      refresh_interval: 1m
  
  - job_name: "mysqld_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "mysqld_exporter"
      refresh_interval: 1m

  - job_name: "nginx_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nginx_exporter"
      refresh_interval: 1m

  - job_name: 'blackbox'
    metrics_path: /probe
    params:
      module: [http_2xx]  # Look for a HTTP 200 response.
    static_configs:
    - targets:
      - "www.baidu.com"
      - "www.google.com"
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: "10.0.0.208:9115"  # 指向实际的Blackbox exporter.
      - target_label: region
        replacement: "remote"

钉钉告警

[root@ubuntu2004 alertmanager-and-dingtalk]#tree .
.
├── alertmanager
│?? ├── config.yml
│?? ├── dingtalk_template.tmpl
│?? ├── email_template.tmpl
│?? ├── wechat_template_02.tmpl
│?? └── wechat_template.tmpl
├── dingtalk
│?? ├── config-no-template.yml
│?? ├── config-use-customed-template.yml
│?? ├── config-use-default-template.yml
│?? ├── config.yml
│?? └── dingtalk_template.tmpl
└── docker-compose.yml


[root@ubuntu2004 alertmanager-and-dingtalk]#cat docker-compose.yml 
version: '3.6'
networks:
  monitoring:
    driver: bridge
    ipam:
      config:
        - subnet: 172.31.66.0/24

services:
  alertmanager:
    image: prom/alertmanager:v0.24.0
    volumes:
      - ./alertmanager/:/etc/alertmanager/
    networks:
      - monitoring
    ports:
      - 9093:9093
    command:
      - '--config.file=/etc/alertmanager/config.yml'
      - '--storage.path=/alertmanager'
      - '--log.level=debug'

  prometheus-webhook-dingtalk:
    image: timonwong/prometheus-webhook-dingtalk:v2.1.0
    hostname: dingtalk.magedu.com
    volumes:
      - ./dingtalk/:/etc/prometheus-webhook-dingtalk/
    #command:
      #- --config.file=config.yml
      #- --config.file=/etc/prometheus-webhook-dingtalk/config-with-template.yml
    networks:
      - monitoring
    ports:
      - 8060:8060

[root@ubuntu2004 alertmanager-and-dingtalk]#cat dingtalk/config.yml 
## Request timeout
# timeout: 5s

## Customizable templates path
templates:
  - /etc/prometheus-webhook-dingtalk/dingtalk_template.tmpl

## You can also override default template using default_message
## The following example to use the 'legacy' template from v0.3.0
default_message:
  title: '{{ template "legacy.title" . }}'
  text: '{{ template "dingtalk.default.message" . }}'

## Targets, previously was known as "profiles"
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=70b3270e3b6968780cec96ee588a9545dc9672981b55574d4f82516a5b474a8c 
    # secret for signature
    secret: SEC44dfe7e1ffc74a797a871f686e0b3cb211a0438a829dc98f47a770dad7db5646

[root@ubuntu2004 alertmanager-and-dingtalk]#tree .
.
├── alertmanager
│?? ├── config.yml
│?? ├── dingtalk_template.tmpl
│?? ├── email_template.tmpl
│?? ├── wechat_template_02.tmpl
│?? └── wechat_template.tmpl
├── dingtalk
│?? ├── config-no-template.yml
│?? ├── config-use-customed-template.yml
│?? ├── config-use-default-template.yml
│?? ├── config.yml
│?? └── dingtalk_template.tmpl
└── docker-compose.yml

[root@ubuntu2004 alertmanager-and-dingtalk]#cat docker-compose.yml 
version: '3.6'
networks:
  monitoring:
    driver: bridge
    ipam:
      config:
        - subnet: 172.31.66.0/24

services:
  alertmanager:
    image: prom/alertmanager:v0.24.0
    volumes:
      - ./alertmanager/:/etc/alertmanager/
    networks:
      - monitoring
    ports:
      - 9093:9093
    command:
      - '--config.file=/etc/alertmanager/config.yml'
      - '--storage.path=/alertmanager'
      - '--log.level=debug'

  prometheus-webhook-dingtalk:
    image: timonwong/prometheus-webhook-dingtalk:v2.1.0
    hostname: dingtalk.magedu.com
    volumes:
      - ./dingtalk/:/etc/prometheus-webhook-dingtalk/
    #command:
      #- --config.file=config.yml
      #- --config.file=/etc/prometheus-webhook-dingtalk/config-with-template.yml
    networks:
      - monitoring
    ports:
      - 8060:8060

[root@ubuntu2004 alertmanager-and-dingtalk]#cat dingtalk/config.yml 
## Request timeout
# timeout: 5s

## Customizable templates path
templates:
  - /etc/prometheus-webhook-dingtalk/dingtalk_template.tmpl

## You can also override default template using default_message
## The following example to use the 'legacy' template from v0.3.0
default_message:
  title: '{{ template "legacy.title" . }}'
  text: '{{ template "dingtalk.default.message" . }}'

## Targets, previously was known as "profiles"
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=70b3270e3b6968780cec96ee588a9545dc9672981b55574d4f82516a5b474a8c 
    # secret for signature
    secret: SEC44dfe7e1ffc74a797a871f686e0b3cb211a0438a829dc98f47a770dad7db5646

部署victoriametrics-cluster实现prometheus远程持久存储

[root@ubuntu2004 victoriametrics-cluster]#cat README.md 
# VictoriaMetrics 示例集群

#### 配置Prometheus以之为远程存储:
remote_write:    # 远程写入到远程 VM 存储
  - url: http://vminsert.magedu.com:8480/insert/0/prometheus

remote_read:
  - url: http://vmselect.magedu.com:8481/select/0/prometheus
其中的0为多租户模型下的租户ID。 #### 配置Grafana以之为数据源: 添加新数据源,将数据加载路径定义为:http://vmselect.magedu.com:8481/select/0/prometheus 配置完成后,导入Dashboard,即可在Grafana中查看基于该数据源的Dashboard。 [root@ubuntu2004 victoriametrics-cluster]#cat docker-compose.yml version: '3.6' networks: vm_net: driver: bridge volumes: strgdata-1: {} strgdata-2: {} grafanadata: {} services: vmstorage-1: container_name: vmstorage-1 image: victoriametrics/vmstorage:v1.83.1-cluster ports: - 8482 - 8400 - 8401 volumes: - strgdata-1:/storage networks: - vm_net command: - '--storageDataPath=/storage' restart: always vmstorage-2: container_name: vmstorage-2 image: victoriametrics/vmstorage:v1.83.1-cluster networks: - vm_net ports: - 8482 - 8400 - 8401 volumes: - strgdata-2:/storage command: - '--storageDataPath=/storage' restart: always vminsert: container_name: vminsert image: victoriametrics/vminsert:v1.83.1-cluster depends_on: - "vmstorage-1" - "vmstorage-2" command: - '--storageNode=vmstorage-1:8400' - '--storageNode=vmstorage-2:8400' ports: - 8480:8480 networks: - vm_net restart: always vmselect: container_name: vmselect image: victoriametrics/vmselect:v1.83.1-cluster depends_on: - "vmstorage-1" - "vmstorage-2" command: - '--storageNode=vmstorage-1:8401' - '--storageNode=vmstorage-2:8401' #- '--vmalert.proxyURL=http://vmalert:8880' networks: - vm_net ports: - 8481:8481 restart: always [root@ubuntu2004 victoriametrics-cluster]#docker-compose up -d /usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version! warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported " Creating network "victoriametrics-cluster_vm_net" with driver "bridge" Creating volume "victoriametrics-cluster_strgdata-1" with default driver Creating volume "victoriametrics-cluster_strgdata-2" with default driver Creating volume "victoriametrics-cluster_grafanadata" with default driver Pulling vmstorage-1 (victoriametrics/vmstorage:v1.83.1-cluster)... v1.83.1-cluster: Pulling from victoriametrics/vmstorage 213ec9aee27d: Pull complete 6a19377ddd0d: Pull complete ea5876b4cb5f: Pull complete Digest: sha256:9f24059dc431c210f6279b6c997ed4fe6cceb4537a446c36b9af1b8506dd2bf0 Status: Downloaded newer image for victoriametrics/vmstorage:v1.83.1-cluster Pulling vminsert (victoriametrics/vminsert:v1.83.1-cluster)... v1.83.1-cluster: Pulling from victoriametrics/vminsert 213ec9aee27d: Already exists 6a19377ddd0d: Already exists 1da207c2dd42: Pull complete Digest: sha256:9bb864163d7dcfbf781d6f82d19e322b5a660c695ecbd146d85592c57e60d9bf Status: Downloaded newer image for victoriametrics/vminsert:v1.83.1-cluster Pulling vmselect (victoriametrics/vmselect:v1.83.1-cluster)... v1.83.1-cluster: Pulling from victoriametrics/vmselect 213ec9aee27d: Already exists 6a19377ddd0d: Already exists 3f737ea0184e: Pull complete Digest: sha256:4fad7d731ee727fce4779523b0f1feb66ed55fe414744e4ce856a93f11175870 Status: Downloaded newer image for victoriametrics/vmselect:v1.83.1-cluster Creating vmstorage-2 ... done Creating vmstorage-1 ... done Creating vminsert ... done Creating vmselect ... done [root@ubuntu2004 victoriametrics-cluster]#ss -ntlp State Recv-Q Send-Q Local Address:Port Peer Address:Port Process LISTEN 0 4096 0.0.0.0:80 0.0.0.0:* users:(("docker-proxy",pid=52611,fd=4)) LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=734,fd=13)) LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=768,fd=3)) LISTEN 0 4096 0.0.0.0:9113 0.0.0.0:* users:(("docker-proxy",pid=52744,fd=4)) LISTEN 0 4096 0.0.0.0:9115 0.0.0.0:* users:(("docker-proxy",pid=55562,fd=4)) LISTEN 0 4096 0.0.0.0:8480 0.0.0.0:* users:(("docker-proxy",pid=75573,fd=4)) LISTEN 0 4096 0.0.0.0:8481 0.0.0.0:* users:(("docker-proxy",pid=75612,fd=4)) LISTEN 0 4096 0.0.0.0:49153 0.0.0.0:* users:(("docker-proxy",pid=75250,fd=4)) LISTEN 0 4096 0.0.0.0:49154 0.0.0.0:* users:(("docker-proxy",pid=75271,fd=4)) LISTEN 0 4096 0.0.0.0:49155 0.0.0.0:* users:(("docker-proxy",pid=75291,fd=4)) LISTEN 0 4096 0.0.0.0:49156 0.0.0.0:* users:(("docker-proxy",pid=75313,fd=4)) LISTEN 0 4096 0.0.0.0:49157 0.0.0.0:* users:(("docker-proxy",pid=75333,fd=4)) LISTEN 0 4096 0.0.0.0:49158 0.0.0.0:* users:(("docker-proxy",pid=75354,fd=4)) LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=43836,fd=7)) LISTEN 0 4096 [::]:80 [::]:* users:(("docker-proxy",pid=52618,fd=4)) LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=768,fd=4)) LISTEN 0 4096 [::]:9113 [::]:* users:(("docker-proxy",pid=52751,fd=4)) LISTEN 0 4096 [::]:9115 [::]:* users:(("docker-proxy",pid=55569,fd=4)) LISTEN 0 4096 [::]:8480 [::]:* users:(("docker-proxy",pid=75579,fd=4)) LISTEN 0 4096 [::]:8481 [::]:* users:(("docker-proxy",pid=75622,fd=4)) LISTEN 0 4096 [::]:49153 [::]:* users:(("docker-proxy",pid=75257,fd=4)) LISTEN 0 4096 [::]:49154 [::]:* users:(("docker-proxy",pid=75279,fd=4)) LISTEN 0 4096 [::]:49155 [::]:* users:(("docker-proxy",pid=75298,fd=4)) LISTEN 0 4096 [::]:49156 [::]:* users:(("docker-proxy",pid=75319,fd=4)) LISTEN 0 4096 [::]:49157 [::]:* users:(("docker-proxy",pid=75341,fd=4)) LISTEN 0 4096 [::]:49158 [::]:* users:(("docker-proxy",pid=75367,fd=4)) [root@ubuntu2004 local]#vim prometheus/prometheus.yml ... remote_write: - url: http://10.0.0.208:8480/insert/0/prometheus remote_read: - url: http://10.0.0.208:8481/select/0/prometheus "prometheus/prometheus.yml" 107L, 2781C 已写入 [root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload

grafana配置新数据源

监控vmselect和vminsert

[root@ubuntu2004 config]#cat vm*
{
     "id": "vminsert",
     "name": "vminsert",
     "address": "10.0.0.208",
     "port": 8480,
     "tags": ["vminsert"],
     "checks": [{
       "http": "http://10.0.0.208:8480/metrics",
       "interval": "5s"
     }]
}

{
     "id": "vmselect",
     "name": "vmselect",
     "address": "10.0.0.208",
     "port": 8481,
     "tags": ["vmselect"],
     "checks": [{
       "http": "http://10.0.0.208:8481/metrics",
       "interval": "5s"
     }]
}

[root@ubuntu2004 config]#curl -XPUT --data @vminsert.json http://localhost:8500/v1/agent/service/register
[root@ubuntu2004 config]#curl -XPUT --data @vmselect.json http://localhost:8500/v1/agent/service/register





[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload
[root@ubuntu2004 local]#cat prometheus/prometheus.yml
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
            - 10.0.0.210:9093
          # - alertmanager:9093
  

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
  - rules/record-rules-*.yml
  - rules/alert-rules-*.yml

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label job=<job_name> to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["localhost:9090"]

  - job_name: "node_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nodes"
      refresh_interval: 1m
    metric_relabel_configs:
    - source_labels:
      - __name__
      regex: "go_info.*"
      action: drop
  
  - job_name: "consul_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "consul_exporter"
      refresh_interval: 1m
  
  - job_name: "mysqld_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "mysqld_exporter"
      refresh_interval: 1m

  - job_name: "nginx_exporter"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "nginx_exporter"
      refresh_interval: 1m

  - job_name: 'blackbox'
    metrics_path: /probe
    params:
      module: [http_2xx]  # Look for a HTTP 200 response.
    static_configs:
    - targets:
      - "www.baidu.com"
      - "www.google.com"
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: "10.0.0.208:9115"  # 指向实际的Blackbox exporter.
      - target_label: region
        replacement: "remote"
    
  - job_name: "grafana"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "grafana"
      refresh_interval: 1m

  - job_name: "vminsert"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "vminsert"
      refresh_interval: 1m


  - job_name: "vmselect"
    metrics_path: '/metrics'
    scheme: 'http'
    consul_sd_configs:
    - server: "localhost:8500"
      tags:
      - "vmselect"
      refresh_interval: 1m


remote_write:
  - url: http://10.0.0.208:8480/insert/0/prometheus

remote_read:
  - url: http://10.0.0.208:8481/select/0/prometheus

docker-compose运行prometheus

[root@ubuntu2004 01-prometheus-basics-example]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Starting 01-prometheus-basics-example_node-exporter_1 ... done
Starting 01-prometheus-basics-example_prometheus_1    ... done


[root@ubuntu2004 01-prometheus-basics-example]#docker-compose ps
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
                    Name                                  Command               State                    Ports                  
--------------------------------------------------------------------------------------------------------------------------------
01-prometheus-basics-example_node-exporter_1   /bin/node_exporter --path. ...   Up      0.0.0.0:9100->9100/tcp,:::9100->9100/tcp
01-prometheus-basics-example_prometheus_1      /bin/prometheus --config.f ...   Up      0.0.0.0:9090->9090/tcp,:::9090->9090/tcp


[root@ubuntu2004 01-prometheus-basics-example]#tree .
.
├── docker-compose.yml
├── prometheus
│   ├── prometheus.yml
│   └── targets
│       ├── nodes-linux.yaml
│       └── prometheus-servers.yaml
└── README.md

[root@ubuntu2004 01-prometheus-basics-example]#vim docker-compose.yml 
version: '3.6'
  
volumes:
    prometheus_data: {}

networks:
  monitoring:
    driver: bridge

services:

  prometheus:
    image: prom/prometheus:v2.40.2
    volumes:
      - ./prometheus/:/etc/prometheus/
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
      - '--web.console.templates=/usr/share/prometheus/consoles'
      - '--web.enable-lifecycle'
    networks:
      - monitoring
    extra_hosts:
      - "server01.magedu.com:${SERVER01_HOST_IP}"
      - "server02.magedu.com:${SERVER02_HOST_IP}"
      - "server03.magedu.com:${SERVER03_HOST_IP}"
    ports:
      - 9090:9090
    restart: always

  node-exporter:
    image: prom/node-exporter:v1.4.0
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
      - '--path.rootfs=/rootfs'
    ports:
      - 9100:9100
    networks:
      - monitoring
    restart: always
[root@ubuntu2004 02-prometheus-sd-consul-example]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "02-prometheus-sd-consul-example_monitoring" with driver "bridge"
Creating volume "02-prometheus-sd-consul-example_prometheus_data" with default driver
Creating volume "02-prometheus-sd-consul-example_grafana_data" with default driver
Pulling consul (consul:1.14)...
1.14: Pulling from library/consul
9621f1afde84: Pull complete
2c3a98fc12ee: Pull complete
ec9c6a4f2410: Pull complete
b15a7bbb699e: Pull complete
c1ba7dc4df33: Pull complete
a0da3713d685: Pull complete
Digest: sha256:192f202e8120d80e864b6e42af1627297dd8b88f42cf148e02a5c6185d717190
Status: Downloaded newer image for consul:1.14
Creating 02-prometheus-sd-consul-example_consul_1        ... done
Creating 02-prometheus-sd-consul-example_node-exporter_1 ... done
Creating 02-prometheus-sd-consul-example_prometheus_1    ... done
[root@ubuntu2004 02-prometheus-sd-consul-example]#docker-compose ps
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
  warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
                     Name                                    Command               State                                                       Ports                                                     
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
02-prometheus-sd-consul-example_consul_1          docker-entrypoint.sh consu ...   Up      8300/tcp, 8301/tcp, 8301/udp, 8302/tcp, 8302/udp, 0.0.0.0:8500->8500/tcp,:::8500->8500/tcp, 8600/tcp, 8600/udp
02-prometheus-sd-consul-example_node-exporter_1   /bin/node_exporter --path. ...   Up      0.0.0.0:9100->9100/tcp,:::9100->9100/tcp                                                                      
02-prometheus-sd-consul-example_prometheus_1      /bin/prometheus --config.f ...   Up      0.0.0.0:9090->9090/tcp,:::9090->9090/tcp          


[root@ubuntu2004 02-prometheus-sd-consul-example]#cat docker-compose.yml 
# Author: MageEdu <mage@magedu.com>
#
version: '3.6'

volumes:
    prometheus_data: {}
    grafana_data: {}

networks:
  monitoring:
    driver: bridge

services:

  prometheus:
    image: prom/prometheus:v2.40.2
    volumes:
      - ./prometheus/:/etc/prometheus/
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/usr/share/prometheus/console_libraries'
      - '--web.console.templates=/usr/share/prometheus/consoles'
      - '--web.enable-lifecycle'
    networks:
      - monitoring
    ports:
      - 9090:9090
    extra_hosts:
      - "server01.magedu.com:${SERVER01_HOST_IP}"
      - "server02.magedu.com:${SERVER02_HOST_IP}"
      - "server03.magedu.com:${SERVER03_HOST_IP}"
    depends_on:
      - consul
    restart: always

  consul:
    image: consul:1.14
    volumes:
      - ./consul_configs:/consul/config
    networks:
      - monitoring
    ports:
      - 8500:8500
    command: ["consul","agent","-dev","-bootstrap","-config-dir","/consul/config","-data-dir","/consul/data","-ui","-log-level","INFO","-bind","127.0.0.1","-client","0.0.0.0"]

  node-exporter:
    image: prom/node-exporter:v1.4.0
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
      - '--path.rootfs=/rootfs'
    ports:
      - 9100:9100
    networks:
      - monitoring
    restart: always


[root@ubuntu2004 02-prometheus-sd-consul-example]#tree .
.
├── consul_configs
│   ├── nodes.json
│   └── prometheus-servers.json
├── docker-compose.yml
├── prometheus
│   └── prometheus.yml
└── README.MD