单机部署prometheus
[root@ubuntu2004 data]#ls
learning-prometheus node_exporter-1.4.0.linux-amd64.tar.gz prometheus-2.40.2.linux-amd64.tar.gz
[root@ubuntu2004 data]#tar xf prometheus-2.40.2.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 local]#ln -sv prometheus-2.40.2.linux-amd64 prometheus
'prometheus' -> 'prometheus-2.40.2.linux-amd64'
[root@ubuntu2004 local]#ll
总用量 44
drwxr-xr-x 11 root root 4096 11月 22 09:45 ./
drwxr-xr-x 14 root root 4096 2月 23 2022 ../
drwxr-xr-x 2 root root 4096 2月 23 2022 bin/
drwxr-xr-x 2 root root 4096 2月 23 2022 etc/
drwxr-xr-x 2 root root 4096 2月 23 2022 games/
drwxr-xr-x 2 root root 4096 2月 23 2022 include/
drwxr-xr-x 3 root root 4096 2月 23 2022 lib/
lrwxrwxrwx 1 root root 9 2月 23 2022 man -> share/man/
lrwxrwxrwx 1 root root 29 11月 22 09:45 prometheus -> prometheus-2.40.2.linux-amd64/
drwxr-xr-x 4 1001 121 4096 11月 17 22:06 prometheus-2.40.2.linux-amd64/
drwxr-xr-x 2 root root 4096 2月 23 2022 sbin/
drwxr-xr-x 4 root root 4096 2月 23 2022 share/
drwxr-xr-x 2 root root 4096 2月 23 2022 src/
[root@ubuntu2004 prometheus]#useradd -s /sbin/nologin prometheus
[root@ubuntu2004 prometheus]#ls
console_libraries consoles LICENSE NOTICE prometheus prometheus.service prometheus.yml prometheus.yml.bak promtool
[root@ubuntu2004 prometheus]#./prometheus
ts=2022-11-22T01:57:04.650Z caller=main.go:512 level=info msg="No time or size retention was set so using the default time retention" duration=15d
ts=2022-11-22T01:57:04.651Z caller=main.go:556 level=info msg="Starting Prometheus Server" mode=server version="(version=2.40.2, branch=HEAD, revision=a07a94a5abb8a979d8aa87297f77f3979148b2da)"
ts=2022-11-22T01:57:04.652Z caller=main.go:561 level=info build_context="(go=go1.19.3, user=root@1b4b53e3f125, date=20221117-13:40:12)"
ts=2022-11-22T01:57:04.653Z caller=main.go:562 level=info host_details="(Linux 5.4.0-124-generic #140-Ubuntu SMP Thu Aug 4 02:23:37 UTC 2022 x86_64 ubuntu2004 (none))"
ts=2022-11-22T01:57:04.653Z caller=main.go:563 level=info fd_limits="(soft=1048576, hard=1048576)"
ts=2022-11-22T01:57:04.654Z caller=main.go:564 level=info vm_limits="(soft=unlimited, hard=unlimited)"
ts=2022-11-22T01:57:04.657Z caller=web.go:559 level=info component=web msg="Start listening for connections" address=0.0.0.0:9090
ts=2022-11-22T01:57:04.659Z caller=main.go:993 level=info msg="Starting TSDB ..."
ts=2022-11-22T01:57:04.660Z caller=tls_config.go:232 level=info component=web msg="Listening on" address=[::]:9090
ts=2022-11-22T01:57:04.660Z caller=tls_config.go:235 level=info component=web msg="TLS is disabled." http2=false address=[::]:9090
ts=2022-11-22T01:57:04.662Z caller=head.go:562 level=info component=tsdb msg="Replaying on-disk memory mappable chunks if any"
ts=2022-11-22T01:57:04.662Z caller=head.go:606 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=2.586?s
ts=2022-11-22T01:57:04.662Z caller=head.go:612 level=info component=tsdb msg="Replaying WAL, this may take a while"
ts=2022-11-22T01:57:04.663Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=0 maxSegment=0
ts=2022-11-22T01:57:04.663Z caller=head.go:720 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=14.855?s wal_replay_duration=1.386087ms wbl_replay_duration=258ns total_replay_duration=1.426833ms
ts=2022-11-22T01:57:04.665Z caller=main.go:1014 level=info fs_type=EXT4_SUPER_MAGIC
ts=2022-11-22T01:57:04.665Z caller=main.go:1017 level=info msg="TSDB started"
ts=2022-11-22T01:57:04.666Z caller=main.go:1197 level=info msg="Loading configuration file" filename=prometheus.yml
ts=2022-11-22T01:57:04.666Z caller=main.go:1234 level=info msg="Completed loading of configuration file" filename=prometheus.yml totalDuration=503.234?s db_storage=899ns remote_storage=1.304?s web_handler=349ns query_engine=533ns scrape=161.395?s scrape_sd=22.788?s notify=18.815?s notify_sd=4.213?s rules=1.097?s tracing=4.86?s
ts=2022-11-22T01:57:04.666Z caller=main.go:978 level=info msg="Server is ready to receive web requests."
ts=2022-11-22T01:57:04.666Z caller=manager.go:944 level=info component="rule manager" msg="Starting rule manager..."
....
[root@ubuntu2004 prometheus]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=736,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=770,fd=3))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=770,fd=4))
LISTEN 0 4096 *:9090 *:* users:(("prometheus",pid=2640,fd=7))
#service,文件使用systemctl管理
[root@ubuntu2004 prometheus]#cat prometheus.service
[Unit]
Description=Monitoring system and time series database
Documentation=https://prometheus.io/docs/introduction/overview/
[Service]
Restart=always
User=prometheus
EnvironmentFile=-/etc/default/prometheus
ExecStart=/usr/local/prometheus/prometheus \
--config.file=/usr/local/prometheus/prometheus.yml \
--storage.tsdb.path=/usr/local/prometheus/data \
--web.console.libraries=/usr/share/prometheus/console_libraries \
--web.enable-lifecycle \
$ARGS
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
SendSIGKILL=no
LimitNOFILE=8192
[Install]
WantedBy=multi-user.target
[root@ubuntu2004 prometheus]#systemctl status prometheus.service
● prometheus.service - Monitoring system and time series database
Loaded: loaded (/lib/systemd/system/prometheus.service; disabled; vendor preset: enabled)
Active: active (running) since Thu 2022-11-24 10:01:05 CST; 3s ago
Docs: https://prometheus.io/docs/introduction/overview/
Main PID: 43386 (prometheus)
Tasks: 8 (limit: 2236)
Memory: 53.0M
CGroup: /system.slice/prometheus.service
└─43386 /usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/usr/local/prometheus/data --web.console.libraries=/usr/share/prometheus/console_libraries --web.enable-lifecycle
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.887Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=5 maxSegment=6
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.887Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=6 maxSegment=6
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.888Z caller=head.go:720 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=4.266669ms wal_replay_duration=104.568886ms wbl_replay_duration=248ns total_r>
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.890Z caller=main.go:1014 level=info fs_type=EXT4_SUPER_MAGIC
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.890Z caller=main.go:1017 level=info msg="TSDB started"
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.890Z caller=main.go:1197 level=info msg="Loading configuration file" filename=/usr/local/prometheus/prometheus.yml
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.892Z caller=main.go:1234 level=info msg="Completed loading of configuration file" filename=/usr/local/prometheus/prometheus.yml totalDuration=2.076078ms db_storage=2.036µs remote_st>
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.892Z caller=main.go:978 level=info msg="Server is ready to receive web requests."
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.892Z caller=manager.go:944 level=info component="rule manager" msg="Starting rule manager..."
11月 24 10:01:05 ubuntu2004 prometheus[43386]: ts=2022-11-24T02:01:05.894Z caller=consul.go:293 level=error component="discovery manager scrape" discovery=consul msg="Error retrieving datacenter name" err="Get \"http://localhost:8500/v1/agent/self\": >
添加node_exporter
[root@ubuntu2004 prometheus]#./prometheus --web.enable-lifecycle
ts=2022-11-22T02:34:50.388Z caller=main.go:512 level=info msg="No time or size retention was set so using the default time retention" duration=15d
ts=2022-11-22T02:34:50.388Z caller=main.go:556 level=info msg="Starting Prometheus Server" mode=server version="(version=2.40.2, branch=HEAD, revision=a07a94a5abb8a979d8aa87297f77f3979148b2da)"
ts=2022-11-22T02:34:50.388Z caller=main.go:561 level=info build_context="(go=go1.19.3, user=root@1b4b53e3f125, date=20221117-13:40:12)"
ts=2022-11-22T02:34:50.388Z caller=main.go:562 level=info host_details="(Linux 5.4.0-124-generic #140-Ubuntu SMP Thu Aug 4 02:23:37 UTC 2022 x86_64 ubuntu2004 (none))"
ts=2022-11-22T02:34:50.388Z caller=main.go:563 level=info fd_limits="(soft=1048576, hard=1048576)"
ts=2022-11-22T02:34:50.388Z caller=main.go:564 level=info vm_limits="(soft=unlimited, hard=unlimited)"
ts=2022-11-22T02:34:50.390Z caller=web.go:559 level=info component=web msg="Start listening for connections" address=0.0.0.0:9090
ts=2022-11-22T02:34:50.390Z caller=main.go:993 level=info msg="Starting TSDB ..."
ts=2022-11-22T02:34:50.395Z caller=tls_config.go:232 level=info component=web msg="Listening on" address=[::]:9090
ts=2022-11-22T02:34:50.395Z caller=tls_config.go:235 level=info component=web msg="TLS is disabled." http2=false address=[::]:9090
ts=2022-11-22T02:34:50.396Z caller=head.go:562 level=info component=tsdb msg="Replaying on-disk memory mappable chunks if any"
ts=2022-11-22T02:34:50.397Z caller=head.go:606 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=719.294µs
ts=2022-11-22T02:34:50.397Z caller=head.go:612 level=info component=tsdb msg="Replaying WAL, this may take a while"
ts=2022-11-22T02:34:50.401Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=0 maxSegment=3
ts=2022-11-22T02:34:50.401Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=1 maxSegment=3
ts=2022-11-22T02:34:50.412Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=2 maxSegment=3
ts=2022-11-22T02:34:50.413Z caller=head.go:683 level=info component=tsdb msg="WAL segment loaded" segment=3 maxSegment=3
ts=2022-11-22T02:34:50.413Z caller=head.go:720 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=120.616µs wal_replay_duration=16.009663ms wbl_replay_duration=124ns total_replay_duration=16.941701ms
ts=2022-11-22T02:34:50.416Z caller=main.go:1014 level=info fs_type=EXT4_SUPER_MAGIC
ts=2022-11-22T02:34:50.416Z caller=main.go:1017 level=info msg="TSDB started"
ts=2022-11-22T02:34:50.417Z caller=main.go:1197 level=info msg="Loading configuration file" filename=prometheus.yml
ts=2022-11-22T02:34:50.418Z caller=main.go:1234 level=info msg="Completed loading of configuration file" filename=prometheus.yml totalDuration=1.060226ms db_storage=2.109µs remote_storage=2.185µs web_handler=590ns query_engine=995ns scrape=273.086µs scrape_sd=64.143µs notify=36.073µs notify_sd=24.71µs rules=2.353µs tracing=28.653µs
ts=2022-11-22T02:34:50.419Z caller=main.go:978 level=info msg="Server is ready to receive web requests."
ts=2022-11-22T02:34:50.419Z caller=manager.go:944 level=info component="rule manager" msg="Starting rule manager..."
....
[root@ubuntu2004 data]#ls
learning-prometheus node_exporter-1.4.0.linux-amd64.tar.gz prometheus-2.40.2.linux-amd64.tar.gz
[root@ubuntu2004 data]#tar xf node_exporter-1.4.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 data]#cd /usr/local/
[root@ubuntu2004 local]#ln -sv node_exporter-1.4.0.linux-amd64/ node_exporter
'node_exporter' -> 'node_exporter-1.4.0.linux-amd64/'
[root@ubuntu2004 local]#cd node_exporter
[root@ubuntu2004 node_exporter]#ls
LICENSE node_exporter NOTICE
[root@ubuntu2004 prometheus]#vim prometheus.yml
...
31 - job_name: "node_exporter"
32 metrics_path: '/metrics'
33 scheme: 'http'
34 static_configs:
35 - targets:
36 - "10.0.0.210:9100"
37 - "10.0.0.209:9100"
38 - "10.0.0.208:9100"
[root@ubuntu2004 node_exporter]#./node_exporter --collector.ntp --collector.tcpstat --no-collector.zfs
ts=2022-11-22T02:29:14.368Z caller=node_exporter.go:182 level=info msg="Starting node_exporter" version="(version=1.4.0, branch=HEAD, revision=7da1321761b3b8dfc9e496e1a60e6a476fec6018)"
ts=2022-11-22T02:29:14.368Z caller=node_exporter.go:183 level=info msg="Build context" build_context="(go.....
.....
#在线重载配置,注意服务运行时加选项--web.enable-lifecycle
[root@ubuntu2004 ~]#curl -XPOST http://localhost:9090/-/reload
[root@ubuntu2004 ~]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=735,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=769,fd=3))
LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=1687,fd=3))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=769,fd=4))
LISTEN 0 4096 *:9090 *:* users:(("prometheus",pid=1326,fd=3))
#service
[root@ubuntu2004 prometheus]#vim /usr/lib/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/node_exporter/node_exporter \
--collector.ntp \
--collector.mountstats \
--collector.systemd \
--collector.ethtool \
--collector.tcpstat
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always
[Install]
WantedBy=multi-user.target
[root@ubuntu2004 prometheus]#chown -R prometheus. /usr/local/node_exporter
[root@ubuntu2004 prometheus]#systemctl daemon-reload
[root@ubuntu2004 prometheus]#systemctl start node_exporter.service
[root@ubuntu2004 prometheus]#systemctl status node_exporter.service
● node_exporter.service - node_exporter
Loaded: loaded (/lib/systemd/system/node_exporter.service; disabled; vendor preset: enabled)
Active: active (running) since Thu 2022-11-24 10:11:44 CST; 3s ago
Docs: https://prometheus.io/docs/introduction/overview/
Main PID: 44033 (node_exporter)
Tasks: 5 (limit: 2236)
Memory: 13.5M
CGroup: /system.slice/node_exporter.service
└─44033 /usr/local/node_exporter/node_exporter --collector.ntp --collector.mountstats --collector.systemd --collector.ethtool --collector.tcpstat
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=thermal_zone
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=time
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=timex
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=udp_queues
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=uname
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=vmstat
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=xfs
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:115 level=info collector=zfs
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.927Z caller=node_exporter.go:199 level=info msg="Listening on" address=:9100
11月 24 10:11:44 ubuntu2004 node_exporter[44033]: ts=2022-11-24T02:11:44.928Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
服务发现
文件服务发现
[root@ubuntu2004 prometheus]#mkdir targets
[root@ubuntu2004 prometheus]#cd targets/
[root@ubuntu2004 targets]#vim nodes-linux.yml
- targets:
- 10.0.0.208:9100
- 10.0.0.209:9100
- 10.0.0.210:9100
labels:
os: ubuntu
[root@ubuntu2004 targets]#cd ..
[root@ubuntu2004 prometheus]#vim prometheus.yml
....
- job_name: "node_exporter"
metrics_path: '/metrics'
scheme: 'http'
file_sd_configs:
- files:
- targets/nodes-*.yml
refresh_interval: 2m
[root@ubuntu2004 prometheus]#curl -XPOST http://localhost:9090/-/reload
consule服务发现
[root@ubuntu2004 data]#unzip consul_1.14.1_linux_amd64.zip -d /usr/local/bin/
Archive: consul_1.14.1_linux_amd64.zip
inflating: /usr/local/bin/consul
[root@ubuntu2004 data]#mkdir -pv /consul/data/
mkdir: 已创建目录 '/consul'
mkdir: 已创建目录 '/consul/data/'
[root@ubuntu2004 data]#mkdir /etc/console/
[root@ubuntu2004 data]#consul agent -dev -ui -data-dir=/consul/data/ -config-dir=/etc/console/ -client=0.0.0.0
==> Starting Consul agent...
Version: '1.14.1'
Build Date: '2022-11-21 16:56:07 +0000 UTC'
Node ID: '499b3393-c5f2-cc1b-face-31e46352e0ce'
Node name: 'ubuntu2004'
Datacenter: 'dc1' (Segment: '<all>')
Server: true (Bootstrap: false)
Client Addr: [0.0.0.0] (HTTP: 8500, HTTPS: -1, gRPC: 8502, gRPC-TLS: 8503, DNS: 8600)
Cluster Addr: 127.0.0.1 (LAN: 8301, WAN: 8302)
Gossip Encryption: false
Auto-Encrypt-TLS: false
HTTPS TLS: Verify Incoming: false, Verify Outgoing: false, Min Version: TLSv1_2
gRPC TLS: Verify Incoming: false, Min Version: TLSv1_2
Internal RPC TLS: Verify Incoming: false, Verify Outgoing: false (Verify Hostname: false), Min Version: TLSv1_2
==> Log data will now stream in as it occurs:
....
[root@ubuntu2004 ~]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 127.0.0.1:8300 0.0.0.0:* users:(("consul",pid=3153,fd=6))
LISTEN 0 4096 127.0.0.1:8301 0.0.0.0:* users:(("consul",pid=3153,fd=9))
LISTEN 0 4096 127.0.0.1:8302 0.0.0.0:* users:(("consul",pid=3153,fd=7))
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=735,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=769,fd=3))
LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=1687,fd=3))
LISTEN 0 4096 *:8500 *:* users:(("consul",pid=3153,fd=17))
LISTEN 0 4096 *:8502 *:* users:(("consul",pid=3153,fd=18))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=769,fd=4))
LISTEN 0 4096 *:8503 *:* users:(("consul",pid=3153,fd=19))
LISTEN 0 4096 *:8600 *:* users:(("consul",pid=3153,fd=16))
LISTEN 0 4096 *:9090 *:* users:(("prometheus",pid=1945,fd=7))
#service文件
[root@ubuntu2004 prometheus]#vim /usr/lib/systemd/system/consul.service
[Unit]
Description="HashiCorp Consul - A service mesh solution"
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
[Service]
EnvironmentFile=-/etc/consul.d/consul.env
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -dev -bootstrap \
-config-dir /usr/local/consul/config \
-data-dir /usr/local/consul/data \
-ui \
-log-level INFO \
-bind 127.0.0.1 \
-client 0.0.0.0
ExecReload=/bin/kill --signal HUP $MAINPID
KillMode=process
KillSignal=SIGTERM
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
[root@ubuntu2004 prometheus]#useradd -s /sbin/nologin consul
[root@ubuntu2004 prometheus]#systemctl daemon-reload
[root@ubuntu2004 prometheus]#mkdir /usr/local/consul/config -p
[root@ubuntu2004 prometheus]#mkdir /usr/local/consul/data
[root@ubuntu2004 prometheus]#chown -R consul. /usr/local/consul/
[root@ubuntu2004 prometheus]#mv /usr/local/bin/consul /usr/bin/
[root@ubuntu2004 prometheus]#systemctl start consul.service
[root@ubuntu2004 prometheus]#systemctl status consul.service
● consul.service - "HashiCorp Consul - A service mesh solution"
Loaded: loaded (/lib/systemd/system/consul.service; disabled; vendor preset: enabled)
Active: active (running) since Thu 2022-11-24 10:08:05 CST; 1s ago
Docs: https://www.consul.io/
Main PID: 43826 (consul)
Tasks: 8 (limit: 2236)
Memory: 89.5M
CGroup: /system.slice/consul.service
└─43826 /usr/bin/consul agent -dev -bootstrap -config-dir /usr/local/consul/config -data-dir /usr/local/consul/data -ui -log-level INFO -bind 127.0.0.1 -client 0.0.0.0
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.735+0800 [INFO] agent.leader: started routine: routine="virtual IP version check"
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.736+0800 [INFO] agent.server: member joined, marking health alive: member=ubuntu2004 partition=default
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.738+0800 [INFO] agent.leader: stopping routine: routine="virtual IP version check"
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.738+0800 [INFO] agent.leader: stopped routine: routine="virtual IP version check"
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.928+0800 [INFO] agent: Synced node info
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.928+0800 [INFO] agent: Synced service: service=node_exporter-node03
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.928+0800 [INFO] agent: Synced service: service=node_exporter-node01
11月 24 10:08:05 ubuntu2004 consul[43826]: 2022-11-24T10:08:05.929+0800 [INFO] agent: Synced service: service=node_exporter-node02
11月 24 10:08:06 ubuntu2004 consul[43826]: 2022-11-24T10:08:06.021+0800 [INFO] agent.server: federation state anti-entropy synced
11月 24 10:08:06 ubuntu2004 consul[43826]: 2022-11-24T10:08:06.149+0800 [WARN] agent: Check is now critical: check=service:node_exporter-node02
[root@ubuntu2004 data]#vim /etc/console/node.json
{
"services": [{
"id": "node_exporter-node01",
"name": "node01",
"address": "10.0.0.210",
"port": 9100,
"tags": ["nodes"],
"checks": [{
"http": "http://10.0.0.210:9100/metrics",
"interval": "5s"
}]
},
{
"ID": "node_exporter-node02",
"Name": "node02",
"Address": "10.0.0.209",
"Port": 9100,
"Tags": ["nodes"],
"Checks": [{
"http": "http://10.0.0.209:9100/metrics",
"interval": "5s"
}]
},
{
"ID": "node_exporter-node03",
"Name": "node03",
"Address": "10.0.0.208",
"Port": 9100,
"Tags": ["nodes"],
"Checks": [{
"http": "http://10.0.0.208:9100/metrics",
"interval": "5s"
}]
}]
}
#重启consul
[root@ubuntu2004 data]#vim /usr/local/prometheus/prometheus.yml
- job_name: "node_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nodes"
refresh_interval: 2m
[root@ubuntu2004 data]#curl -XPOST http://localhost:9090/-/reload
部署consul_exporter
[root@ubuntu2004 data]#tar -xf consul_exporter-0.8.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 data]#cd /usr/local/
[root@ubuntu2004 local]#
[root@ubuntu2004 local]#ls
bin consul consul_exporter-0.8.0.linux-amd64 docker etc games include lib man node_exporter node_exporter-1.4.0.linux-amd64 prometheus prometheus-2.40.2.linux-amd64 sbin share src
[root@ubuntu2004 local]#ln -sv consul_exporter-0.8.0.linux-amd64/ consul_exporter
'consul_exporter' -> 'consul_exporter-0.8.0.linux-amd64/'
[root@ubuntu2004 local]#ll
总用量 60
drwxr-xr-x 15 root root 4096 11月 24 10:27 ./
drwxr-xr-x 14 root root 4096 2月 23 2022 ../
drwxr-xr-x 3 root root 4096 11月 24 10:08 bin/
drwxr-xr-x 4 consul consul 4096 11月 24 10:06 consul/
lrwxrwxrwx 1 root root 34 11月 24 10:27 consul_exporter -> consul_exporter-0.8.0.linux-amd64//
drwxr-xr-x 2 3434 3434 4096 2月 11 2022 consul_exporter-0.8.0.linux-amd64/
drwxrwxr-x 2 wang wang 4096 10月 14 00:50 docker/
drwxr-xr-x 2 root root 4096 2月 23 2022 etc/
drwxr-xr-x 2 root root 4096 2月 23 2022 games/
drwxr-xr-x 2 root root 4096 2月 23 2022 include/
drwxr-xr-x 4 root root 4096 11月 22 14:25 lib/
lrwxrwxrwx 1 root root 9 2月 23 2022 man -> share/man/
lrwxrwxrwx 1 prometheus prometheus 32 11月 22 10:25 node_exporter -> node_exporter-1.4.0.linux-amd64//
drwxr-xr-x 2 3434 3434 4096 9月 26 20:39 node_exporter-1.4.0.linux-amd64/
lrwxrwxrwx 1 prometheus prometheus 29 11月 22 09:45 prometheus -> prometheus-2.40.2.linux-amd64/
drwxr-xr-x 6 prometheus prometheus 4096 11月 24 10:01 prometheus-2.40.2.linux-amd64/
drwxr-xr-x 2 root root 4096 2月 23 2022 sbin/
drwxr-xr-x 4 root root 4096 2月 23 2022 share/
drwxr-xr-x 2 root root 4096 2月 23 2022 src/
[root@ubuntu2004 local]#chown -R consul. consul_exporter
#service文件
[root@ubuntu2004 local]#vim /usr/lib/systemd/system/consul_exporter.service
[Unit]
Description=consul_exporter
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Type=simple
User=consul
EnvironmentFile=-/etc/default/consul_exporter
# 具体使用时,若consul_exporter与consul server不在同一主机时,consul server要指向实际的地址;
ExecStart=/usr/local/consul_exporter/consul_exporter \
--consul.server="http://localhost:8500" \
--web.listen-address=":9107" \
--web.telemetry-path="/metrics" \
--log.level=info \
$ARGS
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always
[Install]
WantedBy=multi-user.target
[root@ubuntu2004 local]#systemctl daemon-reload
[root@ubuntu2004 local]#systemctl start consul_exporter.service
[root@ubuntu2004 local]#systemctl status consul_exporter.service
● consul_exporter.service - consul_exporter
Loaded: loaded (/lib/systemd/system/consul_exporter.service; disabled; vendor preset: enabled)
Active: active (running) since Thu 2022-11-24 10:30:22 CST; 10s ago
Docs: https://prometheus.io/docs/introduction/overview/
Main PID: 44688 (consul_exporter)
Tasks: 4 (limit: 2236)
Memory: 2.0M
CGroup: /system.slice/consul_exporter.service
└─44688 /usr/local/consul_exporter/consul_exporter --consul.server=http://localhost:8500 --web.listen-address=:9107 --web.telemetry-path=/metrics --log.level=info
11月 24 10:30:22 ubuntu2004 systemd[1]: Started consul_exporter.
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.209Z caller=consul_exporter.go:80 level=info msg="Starting consul_exporter" version="(version=0.8.0, branch=HEAD, revision=176aef0f2d437e9fd1cb3a9e29dc4730de717e05)"
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.210Z caller=consul_exporter.go:81 level=info build_context="(go=go1.17.6, user=root@566e953b1722, date=20220210-16:54:21)"
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.210Z caller=consul_exporter.go:132 level=info msg="Listening on address" address=:9107
11月 24 10:30:22 ubuntu2004 consul_exporter[44688]: ts=2022-11-24T02:30:22.211Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
[root@ubuntu2004 local]#ss -ntl
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 127.0.0.1:8300 0.0.0.0:*
LISTEN 0 4096 127.0.0.1:8301 0.0.0.0:*
LISTEN 0 4096 127.0.0.1:8302 0.0.0.0:*
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:*
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 4096 *:9090 *:*
LISTEN 0 4096 *:9100 *:*
LISTEN 0 4096 *:9107 *:*
LISTEN 0 4096 *:8500 *:*
LISTEN 0 4096 *:8502 *:*
LISTEN 0 128 [::]:22 [::]:*
LISTEN 0 4096 *:8503 *:*
LISTEN 0 4096 *:8600 *:*
[root@ubuntu2004 config]#cat consul_exporter.json
{
"id": "consul_exporter",
"name": "consul_exporter",
"address": "10.0.0.210",
"port": 9107,
"tags": ["consul_exporter"],
"checks": [{
"http": "http://10.0.0.210:9107/metrics",
"interval": "5s"
}]
}
[root@ubuntu2004 config]#curl -XPUT --data @consul_exporter.json localhost:8500/v1/agent/service/register
[root@ubuntu2004 config]#vim /usr/local/prometheus/prometheus.yml
...
- job_name: "consul_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "consul_exporter"
refresh_interval: 1m
[root@ubuntu2004 config]#curl -XPOST http://localhost:9090/-/reload
MySqld-exporter部署
[root@ubuntu2004 local]#tar xf mysqld_exporter-0.14.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 local]#ln -sv /usr/local/mysqld_exporter-0.14.0.linux-amd64 /usr/local/mysqld_exporter
'/usr/local/mysqld_exporter' -> '/usr/local/mysqld_exporter-0.14.0.linux-amd64'
[root@ubuntu2004 local]#vim /usr/lib/systemd/system/mysqld_exporter.service
[Unit]
Description=consul_exporter
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Type=simple
User=mysql
EnvironmentFile=-/etc/default/mysqld_exporter
# 具体使用时,若mysql_exporter与mysql server不在同一主机时,mysql server要指向实际的地址;
# mysql_exporter连接mysql server使用的用户名和密码均为exporter,该用户要获得正确的授权;
Environment='DATA_SOURCE_NAME=exporter:exporter@(localhost:3306)'
ExecStart=/usr/local/mysqld_exporter/mysqld_exporter \
--web.listen-address=":9104" \
--web.telemetry-path="/metrics" \
--collect.info_schema.innodb_tablespaces \
--collect.info_schema.innodb_metrics \
--collect.global_status \
--collect.global_variables \
--collect.slave_status \
--collect.engine_innodb_status \
$ARGS
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always
[Install]
WantedBy=multi-user.target
[root@ubuntu2004 local]#chown -R mysql. /usr/local/mysqld_exporter
[root@ubuntu2004 local]#mysql
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 9
Server version: 8.0.31-0ubuntu0.20.04.2 (Ubuntu)
Copyright (c) 2000, 2022, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> CREATE USER 'exporter'@'localhost' IDENTIFIED BY 'exporter';
Query OK, 0 rows affected (0.02 sec)
mysql> GRANT PROCESS, REPLICATION CLIENT ON *.* TO 'exporter'@'localhost';
Query OK, 0 rows affected (0.01 sec)
mysql> GRANT SELECT ON performance_schema.* TO 'exporter'@'localhost';
Query OK, 0 rows affected (0.00 sec)
mysql> FLUSH PRIVILEGES;
Query OK, 0 rows affected (0.01 sec)
mysql> exit
Bye
[root@ubuntu2004 local]#systemctl daemon-reload
[root@ubuntu2004 local]#systemctl start mysqld_exporter.service
[root@ubuntu2004 local]#systemctl status mysqld_exporter.service
● mysqld_exporter.service - consul_exporter
Loaded: loaded (/lib/systemd/system/mysqld_exporter.service; disabled; vendor preset: enabled)
Active: active (running) since Thu 2022-11-24 10:55:13 CST; 4s ago
Docs: https://prometheus.io/docs/introduction/overview/
Main PID: 46014 (mysqld_exporter)
Tasks: 5 (limit: 2236)
Memory: 1.9M
CGroup: /system.slice/mysqld_exporter.service
└─46014 /usr/local/mysqld_exporter/mysqld_exporter --web.listen-address=:9104 --web.telemetry-path=/metrics --collect.info_schema.innodb_tablespaces --collect.info_schema.innodb_metrics --collect.global_status --collect.global_variables ->
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_tablespaces
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_metrics
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=global_status
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.867Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=global_variables
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.868Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_cmp
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.870Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.innodb_cmpmem
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.871Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=info_schema.query_response_time
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.871Z caller=mysqld_exporter.go:293 level=info msg="Scraper enabled" scraper=engine_innodb_status
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.871Z caller=mysqld_exporter.go:303 level=info msg="Listening on address" address=:9104
11月 24 10:55:13 ubuntu2004 mysqld_exporter[46014]: ts=2022-11-24T02:55:13.872Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
[root@ubuntu2004 local]#ss -ntl
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 70 127.0.0.1:33060 0.0.0.0:*
LISTEN 0 151 127.0.0.1:3306 0.0.0.0:*
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:*
LISTEN 0 128 0.0.0.0:22 0.0.0.0:*
LISTEN 0 4096 *:9100 *:*
LISTEN 0 4096 *:9104 *:*
LISTEN 0 128 [::]:22 [::]:*
#注册入consul中,并被服务发现
[root@ubuntu2004 config]#vim mysqld_exporter.json
{
"id": "mysqld_exporter",
"name": "mysqld_exporter",
"address": "10.0.0.209",
"port": 9104,
"tags": ["mysqld_exporter"],
"checks": [{
"http": "http://10.0.0.209:9104/metrics",
"interval": "5s"
}]
}
[root@ubuntu2004 config]#curl -XPUT --data @mysqld_exporter.json localhost:8500/v1/agent/service/register
[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
- job_name: "mysqld_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "mysqld_exporter"
refresh_interval: 1m
[root@ubuntu2004 prometheus]#./promtool check config ./prometheus.yml
Checking ./prometheus.yml
SUCCESS: ./prometheus.yml is valid prometheus config file syntax
[root@ubuntu2004 prometheus]#curl -XPOST http://localhost:9090/-/reload
Nginx_exporter部署
#容器化运行nginx和nginx_exporter
[root@ubuntu2004 nginx-and-exporter]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "nginx-and-exporter_monitoring" with driver "bridge"
Creating nginx-and-exporter_nginx_1 ... done
Creating nginx-and-exporter_nginx-exporter_1 ... done
[root@ubuntu2004 nginx-and-exporter]#tree .
.
├── docker-compose.yml
└── nginx
└── stub_status-server.conf
[root@ubuntu2004 nginx-and-exporter]#cat docker-compose.yml
version: '3.6'
networks:
monitoring:
driver: bridge
ipam:
config:
- subnet: 172.31.107.0/24
services:
nginx:
image: nginx:1.22.1
volumes:
- ./nginx/stub_status-server.conf:/etc/nginx/conf.d/stub_status-server.conf:ro
networks:
- monitoring
expose:
- 8080
- 80
ports:
- 80:80
nginx-exporter:
image: nginx/nginx-prometheus-exporter:0.11
command:
- '-nginx.scrape-uri=http://nginx:8080/stub_status'
networks:
- monitoring
ports:
- '9113:9113'
depends_on:
- nginx
[root@ubuntu2004 nginx-and-exporter]#cat nginx/stub_status-server.conf
server {
listen 8080;
server_name localhost;
location /stub_status {
stub_status;
access_log off;
#allow 172.31.0.0/16;
#deny all;
}
}
[root@ubuntu2004 nginx-and-exporter]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 0.0.0.0:80 0.0.0.0:* users:(("docker-proxy",pid=52611,fd=4))
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=734,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=768,fd=3))
LISTEN 0 4096 0.0.0.0:9113 0.0.0.0:* users:(("docker-proxy",pid=52744,fd=4))
LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=43836,fd=7))
LISTEN 0 4096 [::]:80 [::]:* users:(("docker-proxy",pid=52618,fd=4))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=768,fd=4))
LISTEN 0 4096 [::]:9113 [::]:* users:(("docker-proxy",pid=52751,fd=4))
[root@ubuntu2004 nginx-and-exporter]#curl 10.0.0.208:9113/metrics
# HELP nginx_connections_accepted Accepted client connections
# TYPE nginx_connections_accepted counter
nginx_connections_accepted 1
# HELP nginx_connections_active Active client connections
# TYPE nginx_connections_active gauge
nginx_connections_active 1
# HELP nginx_connections_handled Handled client connections
# TYPE nginx_connections_handled counter
nginx_connections_handled 1
# HELP nginx_connections_reading Connections where NGINX is reading the request header
# TYPE nginx_connections_reading gauge
nginx_connections_reading 0
# HELP nginx_connections_waiting Idle client connections
# TYPE nginx_connections_waiting gauge
nginx_connections_waiting 0
# HELP nginx_connections_writing Connections where NGINX is writing the response back to the client
# TYPE nginx_connections_writing gauge
nginx_connections_writing 1
# HELP nginx_http_requests_total Total http requests
# TYPE nginx_http_requests_total counter
nginx_http_requests_total 2
# HELP nginx_up Status of the last metric scrape
# TYPE nginx_up gauge
nginx_up 1
# HELP nginxexporter_build_info Exporter build information
# TYPE nginxexporter_build_info gauge
nginxexporter_build_info{arch="linux/amd64",commit="e4a6810d4f0b776f7fde37fea1d84e4c7284b72a",date="2022-09-07T21:09:51Z",dirty="false",go="go1.19",version="0.11.0"} 1
#服务注册及服务发现
[root@ubuntu2004 config]#vim nginx_exporter.json
{
"id": "nginx_exporter",
"name": "nginx_exporter",
"address": "10.0.0.208",
"port": 9113,
"tags": ["nginx_exporter"],
"checks": [{
"http": "http://10.0.0.208:9113/metrics",
"interval": "5s"
}]
}
[root@ubuntu2004 config]#curl -XPUT --data @nginx_exporter.json localhost:8500/v1/agent/service/register
[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
- job_name: "nginx_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nginx_exporter"
refresh_interval: 1m
"prometheus/prometheus.yml" 66L, 1725C 已写入
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload
blackbox-exporter部署
[root@ubuntu2004 blackbox-exporter]#cat docker-compose.yml
version: '3.6'
networks:
monitoring:
driver: bridge
ipam:
config:
- subnet: 172.31.136.0/24
services:
blackbox_exporter:
image: prom/blackbox-exporter:v0.22.0
volumes:
- ./blackboxexporter/:/etc/blackboxexporter/
command:
- '--config.file=/etc/blackboxexporter/config.yml'
networks:
- monitoring
ports:
- 9115:9115
[root@ubuntu2004 blackbox-exporter]#cat blackboxexporter/config.yml
modules:
# https://github.com/prometheus/blackbox_exporter/blob/master/example.yml
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions:
- "HTTP/1.1"
- "HTTP/2"
valid_status_codes: [] # Defaults to 2xx
enable_http2: false
method: GET
no_follow_redirects: false
# fail_if_ssl为true时,表示如果站点启用了SSL则探针失败,反之成功;
# fail_if_not_ssl刚好相反;
fail_if_ssl: false
fail_if_not_ssl: false
# fail_if_body_matches_regexp, fail_if_body_not_matches_regexp, fail_if_header_matches, fail_if_header_not_matches
# 可以定义一组正则表达式,用于验证HTTP返回内容是否符合或者不符合正则表达式的内容
fail_if_body_matches_regexp:
- "Could not connect to database"
tls_config:
insecure_skip_verify: false
preferred_ip_protocol: "ip4" # defaults to "ip6"
[root@ubuntu2004 blackbox-exporter]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "blackbox-exporter_monitoring" with driver "bridge"
Pulling blackbox_exporter (prom/blackbox-exporter:v0.22.0)...
v0.22.0: Pulling from prom/blackbox-exporter
19d511225f94: Pull complete
f8b4a0d0d975: Pull complete
73c8559532e0: Pull complete
c1abff7c7d36: Pull complete
Digest: sha256:608acee5704ad49c3308b900230dfc00b25da0c90425f8fed55cf005e07f521b
Status: Downloaded newer image for prom/blackbox-exporter:v0.22.0
Creating blackbox-exporter_blackbox_exporter_1 ... done
[root@ubuntu2004 local]#cat prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label job=<job_name>
to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nodes"
refresh_interval: 1m
- job_name: "consul_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "consul_exporter"
refresh_interval: 1m
- job_name: "mysqld_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "mysqld_exporter"
refresh_interval: 1m
- job_name: "nginx_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nginx_exporter"
refresh_interval: 1m
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx] # Look for a HTTP 200 response.
static_configs:
- targets:
- "www.baidu.com"
- "www.google.com"
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: "10.0.0.208:9115" # 指向实际的Blackbox exporter.
- target_label: region
replacement: "remote"
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload
查询持久化
[root@ubuntu2004 prometheus]#cat rules/record-rules-node.yml
groups:
- name: custom_rules
interval: 5s
rules:
- record: instance:node_cpu:avg_rate5m
expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100
- record: instace:node_memory_MemFree_percent
expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes
- record: instance:root:node_filesystem_free_percent
expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}
[root@ubuntu2004 prometheus]#cat rules/record-rules-mysqld.yml
groups:
- name: mysqld_rules
rules:
# Record slave lag seconds for pre-computed timeseries that takes
# mysql_slave_status_sql_delay
into account
- record: instance:mysql_slave_lag_seconds
expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
# Record slave lag via heartbeat method
- record: instance:mysql_heartbeat_lag_seconds
expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds
- record: job:mysql_transactions:rate5m
expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
[root@ubuntu2004 prometheus]#ls rules/
record-rules-mysqld.yml record-rules-node.yml
[root@ubuntu2004 prometheus]#vim prometheus.yml
...
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- rules/record-rules-*.yml
...
部署grafana
https://grafana.com/grafana/download
[root@ubuntu2004 ~]#apt-get install -y adduser libfontconfig1
[root@ubuntu2004 ~]#wget https://dl.grafana.com/enterprise/release/grafana-enterprise_9.2.6_amd64.deb
[root@ubuntu2004 ~]#dpkg -i grafana-enterprise_9.2.6_amd64.deb
正在选中未选择的软件包 grafana-enterprise。
(正在读取数据库 ... 系统当前共安装有 114765 个文件和目录。)
准备解压 grafana-enterprise_9.2.6_amd64.deb ...
正在解压 grafana-enterprise (9.2.6) ...
正在设置 grafana-enterprise (9.2.6) ...
正在添加系统用户"grafana" (UID 113)...
正在将新用户"grafana" (UID 113)添加到组"grafana"...
无法创建主目录"/usr/share/grafana"。
### NOT starting on installation, please execute the following statements to configure grafana to start automatically using systemd
sudo /bin/systemctl daemon-reload
sudo /bin/systemctl enable grafana-server
### You can start grafana-server by executing
sudo /bin/systemctl start grafana-server
正在处理用于 systemd (245.4-4ubuntu3.15) 的触发器 ...
[root@ubuntu2004 ~]#systemctl enable --now grafana-server.service
Synchronizing state of grafana-server.service with SysV service script with /lib/systemd/systemd-sysv-install.
Executing: /lib/systemd/systemd-sysv-install enable grafana-server
Created symlink /etc/systemd/system/multi-user.target.wants/grafana-server.service → /lib/systemd/system/grafana-server.service.
[root@ubuntu2004 ~]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 127.0.0.1:8300 0.0.0.0:* users:(("consul",pid=43826,fd=6))
LISTEN 0 4096 127.0.0.1:8301 0.0.0.0:* users:(("consul",pid=43826,fd=9))
LISTEN 0 4096 127.0.0.1:8302 0.0.0.0:* users:(("consul",pid=43826,fd=7))
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=740,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=794,fd=3))
LISTEN 0 4096 *:9090 *:* users:(("prometheus",pid=43386,fd=7))
LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=44033,fd=7))
LISTEN 0 4096 *:9107 *:* users:(("consul_exporter",pid=44688,fd=3))
LISTEN 0 4096 *:8500 *:* users:(("consul",pid=43826,fd=17))
LISTEN 0 4096 *:8502 *:* users:(("consul",pid=43826,fd=18))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=794,fd=4))
LISTEN 0 4096 *:8503 *:* users:(("consul",pid=43826,fd=19))
LISTEN 0 4096 *:3000 *:* users:(("grafana-server",pid=70188,fd=8))
LISTEN 0 4096 *:8600 *:* users:(("consul",pid=43826,fd=16))
[root@ubuntu2004 config]#vim grafana.json
{
"id": "grafana",
"name": "grafana",
"address": "10.0.0.210",
"port": 3000,
"tags": ["grafana"],
"checks": [{
"http": "http://10.0.0.210:3000/metrics",
"interval": "5s"
}]
}
[root@ubuntu2004 config]#curl -XPUT --data @grafana.json http://localhost:8500/v1/agent/service/register
[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
- job_name: "grafana"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "grafana"
refresh_interval: 1m
"prometheus/prometheus.yml" 98L, 2540C 已写入
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload
部署alertmanger,实现告警
[root@ubuntu2004 local]#curl -LO https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0
100 24.6M 100 24.6M 0 0 991k 0 0:00:25 0:00:25 --:--:-- 688k
[root@ubuntu2004 local]#tar xf alertmanager-0.24.0.linux-amd64.tar.gz -C /usr/local/
[root@ubuntu2004 local]#ln -sv /usr/local/alertmanager-0.24.0.linux-amd64 /usr/local/alertmanager
'/usr/local/alertmanager' -> '/usr/local/alertmanager-0.24.0.linux-amd64'
[root@ubuntu2004 local]#mkdir /usr/local/alertmanager/data
[root@ubuntu2004 local]#chown -R prometheus.prometheus /usr/local/alertmanager/data
[root@ubuntu2004 local]#vim /usr/lib/systemd/system/alertmanager.service
[Unit]
Description=alertmanager
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/alertmanager/alertmanager \
--config.file="/usr/local/alertmanager/alertmanager.yml" \
--storage.path="/usr/local/alertmanager/data/" \
--data.retention=120h \
--log.level=info
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
Restart=always
[Install]
WantedBy=multi-user.target
[root@ubuntu2004 local]#systemctl daemon-reload
[root@ubuntu2004 local]#systemctl start alertmanager.service
[root@ubuntu2004 local]#systemctl status alertmanager.service
● alertmanager.service - alertmanager
Loaded: loaded (/lib/systemd/system/alertmanager.service; disabled; vendor preset: enabled)
Active: active (running) since Thu 2022-11-24 14:41:18 CST; 4s ago
Docs: https://prometheus.io/docs/introduction/overview/
Main PID: 53363 (alertmanager)
Tasks: 8 (limit: 2236)
Memory: 13.2M
CGroup: /system.slice/alertmanager.service
└─53363 /usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml --storage.path=/usr/local/alertmanager/data/ --data.retention=120h --log.level=info
11月 24 14:41:18 ubuntu2004 systemd[1]: Started alertmanager.
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.169Z caller=main.go:231 level=info msg="Starting Alertmanager" version="(version=0.24.0, branch=HEAD, revision=f484b17fa3c583ed1b2c8bbcec20ba1db2aa5f11)"
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.169Z caller=main.go:232 level=info build_context="(go=go1.17.8, user=root@265f14f5c6fc, date=20220325-09:31:33)"
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.181Z caller=cluster.go:185 level=info component=cluster msg="setting advertise address explicitly" addr=10.0.0.210 port=9094
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.182Z caller=cluster.go:680 level=info component=cluster msg="Waiting for gossip to settle..." interval=2s
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.216Z caller=coordinator.go:113 level=info component=configuration msg="Loading configuration file" file=/usr/local/alertmanager/alertmanager.yml
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.216Z caller=coordinator.go:126 level=info component=configuration msg="Completed loading of configuration file" file=/usr/local/alertmanager/alertmanager.yml
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.218Z caller=main.go:535 level=info msg=Listening address=:9093
11月 24 14:41:18 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:18.218Z caller=tls_config.go:195 level=info msg="TLS is disabled." http2=false
11月 24 14:41:20 ubuntu2004 alertmanager[53363]: ts=2022-11-24T06:41:20.184Z caller=cluster.go:705 level=info component=cluster msg="gossip not settled" polls=0 before=0 now=1 elapsed=2.000892394s
[root@ubuntu2004 local]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 127.0.0.1:8300 0.0.0.0:* users:(("consul",pid=43826,fd=6))
LISTEN 0 4096 127.0.0.1:8301 0.0.0.0:* users:(("consul",pid=43826,fd=9))
LISTEN 0 4096 127.0.0.1:8302 0.0.0.0:* users:(("consul",pid=43826,fd=7))
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=740,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=794,fd=3))
LISTEN 0 4096 *:9090 *:* users:(("prometheus",pid=43386,fd=7))
LISTEN 0 4096 *:9093 *:* users:(("alertmanager",pid=53363,fd=8))
LISTEN 0 4096 *:9094 *:* users:(("alertmanager",pid=53363,fd=3))
LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=44033,fd=7))
LISTEN 0 4096 *:9107 *:* users:(("consul_exporter",pid=44688,fd=3))
LISTEN 0 4096 *:8500 *:* users:(("consul",pid=43826,fd=17))
LISTEN 0 4096 *:8502 *:* users:(("consul",pid=43826,fd=18))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=794,fd=4))
LISTEN 0 4096 *:8503 *:* users:(("consul",pid=43826,fd=19))
LISTEN 0 4096 *:8600 *:* users:(("consul",pid=43826,fd=16))
邮件或企业微信告警
[root@ubuntu2004 alertmanager]#cat alertmanager.yml
global:
resolve_timeout: 1m
smtp_smarthost: 'smtp.qq.com:465'
smtp_from: '985347841@qq.com'
smtp_auth_username: '985347841@qq.com'
smtp_auth_password: 'iovshgiwohaambdfg'
smtp_hello: '@qq.com'
smtp_require_tls: false
route:
group_by: ['group', 'job', 'alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 10m
receiver: email
#receiver: wechat
templates:
- '/etc/alertmanager/email_template.tmpl'
# 定义接收者
receivers:
- name: 'email'
email_configs:
- to: '2508208842@qq.com'
headers:
subject: "{{ .Status | toUpper }} {{ .CommonLabels.env }}:{{ .CommonLabels.cluster }} {{ .CommonLabels.alertname }}"
html: '{{ template "email.to.html" . }}'
send_resolved: true
#- name: 'wechat'
# wechat_configs:
# - corp_id: ww4c893118fbf4d07c
# to_user: '@all'
# agent_id: 1000008
# api_secret: WTepmmaqxbBOeTQOuxa0Olzov_hSEWsZWrPX1k6opMk
# send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']
[root@ubuntu2004 local]#cat prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.0.0.210:9093
# - alertmanager:9093
[root@ubuntu2004 local]#cat prometheus/rules/alert-rules-blackbox-exporter.yml
groups:
- name: blackbox
rules:
# Blackbox probe failed
- alert: BlackboxProbeFailed
expr: probe_success == 0
for: 0m
labels:
severity: critical
annotations:
summary: Blackbox probe failed (instance {{ $labels.instance }})
description: "Probe failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# Blackbox slow probe
- alert: BlackboxSlowProbe
expr: avg_over_time(probe_duration_seconds[1m]) > 1
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox slow probe (instance {{ $labels.instance }})
description: "Blackbox probe took more than 1s to complete\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# Blackbox probe HTTP failure
- alert: BlackboxProbeHttpFailure
expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
for: 0m
labels:
severity: critical
annotations:
summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
description: "HTTP status code is not 200-399\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# Blackbox probe slow HTTP
- alert: BlackboxProbeSlowHttp
expr: avg_over_time(probe_http_duration_seconds[1m]) > 1
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
description: "HTTP request took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# Blackbox probe slow ping
- alert: BlackboxProbeSlowPing
expr: avg_over_time(probe_icmp_duration_seconds[1m]) > 1
for: 1m
labels:
severity: warning
annotations:
summary: Blackbox probe slow ping (instance {{ $labels.instance }})
description: "Blackbox ping took more than 1s\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- rules/record-rules-*.yml
- rules/alert-rules-*.yml
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label job=<job_name>
to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nodes"
refresh_interval: 1m
- job_name: "consul_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "consul_exporter"
refresh_interval: 1m
- job_name: "mysqld_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "mysqld_exporter"
refresh_interval: 1m
- job_name: "nginx_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nginx_exporter"
refresh_interval: 1m
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx] # Look for a HTTP 200 response.
static_configs:
- targets:
- "www.baidu.com"
- "www.google.com"
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: "10.0.0.208:9115" # 指向实际的Blackbox exporter.
- target_label: region
replacement: "remote"
钉钉告警
[root@ubuntu2004 alertmanager-and-dingtalk]#tree .
.
├── alertmanager
│?? ├── config.yml
│?? ├── dingtalk_template.tmpl
│?? ├── email_template.tmpl
│?? ├── wechat_template_02.tmpl
│?? └── wechat_template.tmpl
├── dingtalk
│?? ├── config-no-template.yml
│?? ├── config-use-customed-template.yml
│?? ├── config-use-default-template.yml
│?? ├── config.yml
│?? └── dingtalk_template.tmpl
└── docker-compose.yml
[root@ubuntu2004 alertmanager-and-dingtalk]#cat docker-compose.yml
version: '3.6'
networks:
monitoring:
driver: bridge
ipam:
config:
- subnet: 172.31.66.0/24
services:
alertmanager:
image: prom/alertmanager:v0.24.0
volumes:
- ./alertmanager/:/etc/alertmanager/
networks:
- monitoring
ports:
- 9093:9093
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
- '--log.level=debug'
prometheus-webhook-dingtalk:
image: timonwong/prometheus-webhook-dingtalk:v2.1.0
hostname: dingtalk.magedu.com
volumes:
- ./dingtalk/:/etc/prometheus-webhook-dingtalk/
#command:
#- --config.file=config.yml
#- --config.file=/etc/prometheus-webhook-dingtalk/config-with-template.yml
networks:
- monitoring
ports:
- 8060:8060
[root@ubuntu2004 alertmanager-and-dingtalk]#cat dingtalk/config.yml
## Request timeout
# timeout: 5s
## Customizable templates path
templates:
- /etc/prometheus-webhook-dingtalk/dingtalk_template.tmpl
## You can also override default template using default_message
## The following example to use the 'legacy' template from v0.3.0
default_message:
title: '{{ template "legacy.title" . }}'
text: '{{ template "dingtalk.default.message" . }}'
## Targets, previously was known as "profiles"
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=70b3270e3b6968780cec96ee588a9545dc9672981b55574d4f82516a5b474a8c
# secret for signature
secret: SEC44dfe7e1ffc74a797a871f686e0b3cb211a0438a829dc98f47a770dad7db5646
[root@ubuntu2004 alertmanager-and-dingtalk]#tree .
.
├── alertmanager
│?? ├── config.yml
│?? ├── dingtalk_template.tmpl
│?? ├── email_template.tmpl
│?? ├── wechat_template_02.tmpl
│?? └── wechat_template.tmpl
├── dingtalk
│?? ├── config-no-template.yml
│?? ├── config-use-customed-template.yml
│?? ├── config-use-default-template.yml
│?? ├── config.yml
│?? └── dingtalk_template.tmpl
└── docker-compose.yml
[root@ubuntu2004 alertmanager-and-dingtalk]#cat docker-compose.yml
version: '3.6'
networks:
monitoring:
driver: bridge
ipam:
config:
- subnet: 172.31.66.0/24
services:
alertmanager:
image: prom/alertmanager:v0.24.0
volumes:
- ./alertmanager/:/etc/alertmanager/
networks:
- monitoring
ports:
- 9093:9093
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
- '--log.level=debug'
prometheus-webhook-dingtalk:
image: timonwong/prometheus-webhook-dingtalk:v2.1.0
hostname: dingtalk.magedu.com
volumes:
- ./dingtalk/:/etc/prometheus-webhook-dingtalk/
#command:
#- --config.file=config.yml
#- --config.file=/etc/prometheus-webhook-dingtalk/config-with-template.yml
networks:
- monitoring
ports:
- 8060:8060
[root@ubuntu2004 alertmanager-and-dingtalk]#cat dingtalk/config.yml
## Request timeout
# timeout: 5s
## Customizable templates path
templates:
- /etc/prometheus-webhook-dingtalk/dingtalk_template.tmpl
## You can also override default template using default_message
## The following example to use the 'legacy' template from v0.3.0
default_message:
title: '{{ template "legacy.title" . }}'
text: '{{ template "dingtalk.default.message" . }}'
## Targets, previously was known as "profiles"
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=70b3270e3b6968780cec96ee588a9545dc9672981b55574d4f82516a5b474a8c
# secret for signature
secret: SEC44dfe7e1ffc74a797a871f686e0b3cb211a0438a829dc98f47a770dad7db5646
部署victoriametrics-cluster实现prometheus远程持久存储
[root@ubuntu2004 victoriametrics-cluster]#cat README.md
# VictoriaMetrics 示例集群
#### 配置Prometheus以之为远程存储:
remote_write: # 远程写入到远程 VM 存储
- url: http://vminsert.magedu.com:8480/insert/0/prometheus
remote_read:
- url: http://vmselect.magedu.com:8481/select/0/prometheus
其中的0为多租户模型下的租户ID。
#### 配置Grafana以之为数据源:
添加新数据源,将数据加载路径定义为:http://vmselect.magedu.com:8481/select/0/prometheus
配置完成后,导入Dashboard,即可在Grafana中查看基于该数据源的Dashboard。
[root@ubuntu2004 victoriametrics-cluster]#cat docker-compose.yml
version: '3.6'
networks:
vm_net:
driver: bridge
volumes:
strgdata-1: {}
strgdata-2: {}
grafanadata: {}
services:
vmstorage-1:
container_name: vmstorage-1
image: victoriametrics/vmstorage:v1.83.1-cluster
ports:
- 8482
- 8400
- 8401
volumes:
- strgdata-1:/storage
networks:
- vm_net
command:
- '--storageDataPath=/storage'
restart: always
vmstorage-2:
container_name: vmstorage-2
image: victoriametrics/vmstorage:v1.83.1-cluster
networks:
- vm_net
ports:
- 8482
- 8400
- 8401
volumes:
- strgdata-2:/storage
command:
- '--storageDataPath=/storage'
restart: always
vminsert:
container_name: vminsert
image: victoriametrics/vminsert:v1.83.1-cluster
depends_on:
- "vmstorage-1"
- "vmstorage-2"
command:
- '--storageNode=vmstorage-1:8400'
- '--storageNode=vmstorage-2:8400'
ports:
- 8480:8480
networks:
- vm_net
restart: always
vmselect:
container_name: vmselect
image: victoriametrics/vmselect:v1.83.1-cluster
depends_on:
- "vmstorage-1"
- "vmstorage-2"
command:
- '--storageNode=vmstorage-1:8401'
- '--storageNode=vmstorage-2:8401'
#- '--vmalert.proxyURL=http://vmalert:8880'
networks:
- vm_net
ports:
- 8481:8481
restart: always
[root@ubuntu2004 victoriametrics-cluster]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.13) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "victoriametrics-cluster_vm_net" with driver "bridge"
Creating volume "victoriametrics-cluster_strgdata-1" with default driver
Creating volume "victoriametrics-cluster_strgdata-2" with default driver
Creating volume "victoriametrics-cluster_grafanadata" with default driver
Pulling vmstorage-1 (victoriametrics/vmstorage:v1.83.1-cluster)...
v1.83.1-cluster: Pulling from victoriametrics/vmstorage
213ec9aee27d: Pull complete
6a19377ddd0d: Pull complete
ea5876b4cb5f: Pull complete
Digest: sha256:9f24059dc431c210f6279b6c997ed4fe6cceb4537a446c36b9af1b8506dd2bf0
Status: Downloaded newer image for victoriametrics/vmstorage:v1.83.1-cluster
Pulling vminsert (victoriametrics/vminsert:v1.83.1-cluster)...
v1.83.1-cluster: Pulling from victoriametrics/vminsert
213ec9aee27d: Already exists
6a19377ddd0d: Already exists
1da207c2dd42: Pull complete
Digest: sha256:9bb864163d7dcfbf781d6f82d19e322b5a660c695ecbd146d85592c57e60d9bf
Status: Downloaded newer image for victoriametrics/vminsert:v1.83.1-cluster
Pulling vmselect (victoriametrics/vmselect:v1.83.1-cluster)...
v1.83.1-cluster: Pulling from victoriametrics/vmselect
213ec9aee27d: Already exists
6a19377ddd0d: Already exists
3f737ea0184e: Pull complete
Digest: sha256:4fad7d731ee727fce4779523b0f1feb66ed55fe414744e4ce856a93f11175870
Status: Downloaded newer image for victoriametrics/vmselect:v1.83.1-cluster
Creating vmstorage-2 ... done
Creating vmstorage-1 ... done
Creating vminsert ... done
Creating vmselect ... done
[root@ubuntu2004 victoriametrics-cluster]#ss -ntlp
State Recv-Q Send-Q Local Address:Port Peer Address:Port Process
LISTEN 0 4096 0.0.0.0:80 0.0.0.0:* users:(("docker-proxy",pid=52611,fd=4))
LISTEN 0 4096 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=734,fd=13))
LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=768,fd=3))
LISTEN 0 4096 0.0.0.0:9113 0.0.0.0:* users:(("docker-proxy",pid=52744,fd=4))
LISTEN 0 4096 0.0.0.0:9115 0.0.0.0:* users:(("docker-proxy",pid=55562,fd=4))
LISTEN 0 4096 0.0.0.0:8480 0.0.0.0:* users:(("docker-proxy",pid=75573,fd=4))
LISTEN 0 4096 0.0.0.0:8481 0.0.0.0:* users:(("docker-proxy",pid=75612,fd=4))
LISTEN 0 4096 0.0.0.0:49153 0.0.0.0:* users:(("docker-proxy",pid=75250,fd=4))
LISTEN 0 4096 0.0.0.0:49154 0.0.0.0:* users:(("docker-proxy",pid=75271,fd=4))
LISTEN 0 4096 0.0.0.0:49155 0.0.0.0:* users:(("docker-proxy",pid=75291,fd=4))
LISTEN 0 4096 0.0.0.0:49156 0.0.0.0:* users:(("docker-proxy",pid=75313,fd=4))
LISTEN 0 4096 0.0.0.0:49157 0.0.0.0:* users:(("docker-proxy",pid=75333,fd=4))
LISTEN 0 4096 0.0.0.0:49158 0.0.0.0:* users:(("docker-proxy",pid=75354,fd=4))
LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=43836,fd=7))
LISTEN 0 4096 [::]:80 [::]:* users:(("docker-proxy",pid=52618,fd=4))
LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=768,fd=4))
LISTEN 0 4096 [::]:9113 [::]:* users:(("docker-proxy",pid=52751,fd=4))
LISTEN 0 4096 [::]:9115 [::]:* users:(("docker-proxy",pid=55569,fd=4))
LISTEN 0 4096 [::]:8480 [::]:* users:(("docker-proxy",pid=75579,fd=4))
LISTEN 0 4096 [::]:8481 [::]:* users:(("docker-proxy",pid=75622,fd=4))
LISTEN 0 4096 [::]:49153 [::]:* users:(("docker-proxy",pid=75257,fd=4))
LISTEN 0 4096 [::]:49154 [::]:* users:(("docker-proxy",pid=75279,fd=4))
LISTEN 0 4096 [::]:49155 [::]:* users:(("docker-proxy",pid=75298,fd=4))
LISTEN 0 4096 [::]:49156 [::]:* users:(("docker-proxy",pid=75319,fd=4))
LISTEN 0 4096 [::]:49157 [::]:* users:(("docker-proxy",pid=75341,fd=4))
LISTEN 0 4096 [::]:49158 [::]:* users:(("docker-proxy",pid=75367,fd=4))
[root@ubuntu2004 local]#vim prometheus/prometheus.yml
...
remote_write:
- url: http://10.0.0.208:8480/insert/0/prometheus
remote_read:
- url: http://10.0.0.208:8481/select/0/prometheus
"prometheus/prometheus.yml" 107L, 2781C 已写入
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload
grafana配置新数据源
监控vmselect和vminsert
[root@ubuntu2004 config]#cat vm*
{
"id": "vminsert",
"name": "vminsert",
"address": "10.0.0.208",
"port": 8480,
"tags": ["vminsert"],
"checks": [{
"http": "http://10.0.0.208:8480/metrics",
"interval": "5s"
}]
}
{
"id": "vmselect",
"name": "vmselect",
"address": "10.0.0.208",
"port": 8481,
"tags": ["vmselect"],
"checks": [{
"http": "http://10.0.0.208:8481/metrics",
"interval": "5s"
}]
}
[root@ubuntu2004 config]#curl -XPUT --data @vminsert.json http://localhost:8500/v1/agent/service/register
[root@ubuntu2004 config]#curl -XPUT --data @vmselect.json http://localhost:8500/v1/agent/service/register
[root@ubuntu2004 local]#curl -XPOST http://localhost:9090/-/reload
[root@ubuntu2004 local]#cat prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.0.0.210:9093
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
- rules/record-rules-*.yml
- rules/alert-rules-*.yml
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label job=<job_name>
to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nodes"
refresh_interval: 1m
metric_relabel_configs:
- source_labels:
- __name__
regex: "go_info.*"
action: drop
- job_name: "consul_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "consul_exporter"
refresh_interval: 1m
- job_name: "mysqld_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "mysqld_exporter"
refresh_interval: 1m
- job_name: "nginx_exporter"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "nginx_exporter"
refresh_interval: 1m
- job_name: 'blackbox'
metrics_path: /probe
params:
module: [http_2xx] # Look for a HTTP 200 response.
static_configs:
- targets:
- "www.baidu.com"
- "www.google.com"
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: "10.0.0.208:9115" # 指向实际的Blackbox exporter.
- target_label: region
replacement: "remote"
- job_name: "grafana"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "grafana"
refresh_interval: 1m
- job_name: "vminsert"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "vminsert"
refresh_interval: 1m
- job_name: "vmselect"
metrics_path: '/metrics'
scheme: 'http'
consul_sd_configs:
- server: "localhost:8500"
tags:
- "vmselect"
refresh_interval: 1m
remote_write:
- url: http://10.0.0.208:8480/insert/0/prometheus
remote_read:
- url: http://10.0.0.208:8481/select/0/prometheus
docker-compose运行prometheus
[root@ubuntu2004 01-prometheus-basics-example]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Starting 01-prometheus-basics-example_node-exporter_1 ... done
Starting 01-prometheus-basics-example_prometheus_1 ... done
[root@ubuntu2004 01-prometheus-basics-example]#docker-compose ps
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Name Command State Ports
--------------------------------------------------------------------------------------------------------------------------------
01-prometheus-basics-example_node-exporter_1 /bin/node_exporter --path. ... Up 0.0.0.0:9100->9100/tcp,:::9100->9100/tcp
01-prometheus-basics-example_prometheus_1 /bin/prometheus --config.f ... Up 0.0.0.0:9090->9090/tcp,:::9090->9090/tcp
[root@ubuntu2004 01-prometheus-basics-example]#tree .
.
├── docker-compose.yml
├── prometheus
│ ├── prometheus.yml
│ └── targets
│ ├── nodes-linux.yaml
│ └── prometheus-servers.yaml
└── README.md
[root@ubuntu2004 01-prometheus-basics-example]#vim docker-compose.yml
version: '3.6'
volumes:
prometheus_data: {}
networks:
monitoring:
driver: bridge
services:
prometheus:
image: prom/prometheus:v2.40.2
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.enable-lifecycle'
networks:
- monitoring
extra_hosts:
- "server01.magedu.com:${SERVER01_HOST_IP}"
- "server02.magedu.com:${SERVER02_HOST_IP}"
- "server03.magedu.com:${SERVER03_HOST_IP}"
ports:
- 9090:9090
restart: always
node-exporter:
image: prom/node-exporter:v1.4.0
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
- '--path.rootfs=/rootfs'
ports:
- 9100:9100
networks:
- monitoring
restart: always
[root@ubuntu2004 02-prometheus-sd-consul-example]#docker-compose up -d
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Creating network "02-prometheus-sd-consul-example_monitoring" with driver "bridge"
Creating volume "02-prometheus-sd-consul-example_prometheus_data" with default driver
Creating volume "02-prometheus-sd-consul-example_grafana_data" with default driver
Pulling consul (consul:1.14)...
1.14: Pulling from library/consul
9621f1afde84: Pull complete
2c3a98fc12ee: Pull complete
ec9c6a4f2410: Pull complete
b15a7bbb699e: Pull complete
c1ba7dc4df33: Pull complete
a0da3713d685: Pull complete
Digest: sha256:192f202e8120d80e864b6e42af1627297dd8b88f42cf148e02a5c6185d717190
Status: Downloaded newer image for consul:1.14
Creating 02-prometheus-sd-consul-example_consul_1 ... done
Creating 02-prometheus-sd-consul-example_node-exporter_1 ... done
Creating 02-prometheus-sd-consul-example_prometheus_1 ... done
[root@ubuntu2004 02-prometheus-sd-consul-example]#docker-compose ps
/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
Name Command State Ports
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
02-prometheus-sd-consul-example_consul_1 docker-entrypoint.sh consu ... Up 8300/tcp, 8301/tcp, 8301/udp, 8302/tcp, 8302/udp, 0.0.0.0:8500->8500/tcp,:::8500->8500/tcp, 8600/tcp, 8600/udp
02-prometheus-sd-consul-example_node-exporter_1 /bin/node_exporter --path. ... Up 0.0.0.0:9100->9100/tcp,:::9100->9100/tcp
02-prometheus-sd-consul-example_prometheus_1 /bin/prometheus --config.f ... Up 0.0.0.0:9090->9090/tcp,:::9090->9090/tcp
[root@ubuntu2004 02-prometheus-sd-consul-example]#cat docker-compose.yml
# Author: MageEdu <mage@magedu.com>
#
version: '3.6'
volumes:
prometheus_data: {}
grafana_data: {}
networks:
monitoring:
driver: bridge
services:
prometheus:
image: prom/prometheus:v2.40.2
volumes:
- ./prometheus/:/etc/prometheus/
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--web.enable-lifecycle'
networks:
- monitoring
ports:
- 9090:9090
extra_hosts:
- "server01.magedu.com:${SERVER01_HOST_IP}"
- "server02.magedu.com:${SERVER02_HOST_IP}"
- "server03.magedu.com:${SERVER03_HOST_IP}"
depends_on:
- consul
restart: always
consul:
image: consul:1.14
volumes:
- ./consul_configs:/consul/config
networks:
- monitoring
ports:
- 8500:8500
command: ["consul","agent","-dev","-bootstrap","-config-dir","/consul/config","-data-dir","/consul/data","-ui","-log-level","INFO","-bind","127.0.0.1","-client","0.0.0.0"]
node-exporter:
image: prom/node-exporter:v1.4.0
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
- '--path.rootfs=/rootfs'
ports:
- 9100:9100
networks:
- monitoring
restart: always
[root@ubuntu2004 02-prometheus-sd-consul-example]#tree .
.
├── consul_configs
│ ├── nodes.json
│ └── prometheus-servers.json
├── docker-compose.yml
├── prometheus
│ └── prometheus.yml
└── README.MD