[root@easzlab-deploy-01 data]#mkdir prometheus
[root@easzlab-deploy-01 data]#cd prometheus/
[root@easzlab-deploy-01 prometheus]#wget https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/v0.11.0.tar.gz
[root@easzlab-deploy-01 prometheus]#tar xf v0.11.0.tar.gz
[root@easzlab-deploy-01 prometheus]#cd kube-prometheus-0.11.0/
对grafana、prometheus、altermanger做持久化存储
#创建grafana存储pvc
[root@easzlab-deploy-01 prometheus]#vim grafana-storage-pvc.yaml
apiVersion: v1
kind: Namespace
metadata:
labels:
kubernetes.io/metadata.name: monitoring
name: monitoring
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-storage
namespace: monitoring
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
storageClassName: hdd-rook-ceph-block
[root@easzlab-deploy-01 prometheus]#kubectl apply -f grafana-storage-pvc.yaml
namespace/monitoring created
persistentvolumeclaim/grafana-storage created
#在grafana-deployment.yaml增加存储类
[root@easzlab-deploy-01 manifests]#pwd
/data/prometheus/kube-prometheus-0.11.0/manifests
[root@easzlab-deploy-01 manifests]#vim +149 grafana-deployment.yaml
volumes:
151 - name: grafana-storage
152 persistentVolumeClaim:
153 claimName: grafana-storage
#添加在最后对齐上一行
[root@easzlab-deploy-01 manifests]#vim prometheus-prometheus.yaml
storage:
volumeClaimTemplate:
spec:
storageClassName: hdd-rook-ceph-block
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 100Gi
# retention: 1y
#添加在最后对齐上一行
[root@easzlab-deploy-01 manifests]#vim alertmanager-alertmanager.yaml
storage:
volumeClaimTemplate:
spec:
storageClassName: hdd-rook-ceph-block
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 100Gi
安装kube-prometheues
[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl create -f manifests/setup/
[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl create -f manifests/
#手动加载缺失的image
[root@easzlab-deploy-01 ansible]#vim prometheus_images.yaml
---
- name: install prometheus images
hosts: master,node,ceph
tasks:
- name: create dir1
file:
path: /data/prometheus
state: directory
- name: copy
copy:
src: "{{ item }}"
dest: /data/prometheus
loop:
- kube-state-metrics.tar
- prometheus-adapter.tar
- name: shell
shell: "nerdctl -n k8s.io load -i /data/prometheus/{{ item }}"
loop:
- kube-state-metrics.tar
- prometheus-adapter.tar
[root@easzlab-deploy-01 ansible]#ansible-playbook prometheus_images.yaml
PLAY [install prometheus images] ************************************************************************************************************************************************************
TASK [Gathering Facts] **********************************************************************************************************************************************************************
ok: [easzlab-k8s-master-01]
ok: [easzlab-k8s-master-02]
ok: [easzlab-k8s-master-03]
ok: [easzlab-k8s-node-02]
ok: [easzlab-k8s-node-01]
ok: [easzlab-k8s-node-03]
ok: [easzlab-k8s-ceph-01]
ok: [easzlab-k8s-ceph-02]
ok: [easzlab-k8s-ceph-03]
TASK [create dir1] **************************************************************************************************************************************************************************
changed: [easzlab-k8s-master-03]
changed: [easzlab-k8s-node-01]
changed: [easzlab-k8s-node-02]
changed: [easzlab-k8s-master-01]
changed: [easzlab-k8s-master-02]
changed: [easzlab-k8s-ceph-02]
changed: [easzlab-k8s-node-03]
changed: [easzlab-k8s-ceph-01]
changed: [easzlab-k8s-ceph-03]
TASK [copy] *********************************************************************************************************************************************************************************
changed: [easzlab-k8s-master-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-03] => (item=prometheus-adapter.tar)
TASK [shell] ********************************************************************************************************************************************************************************
changed: [easzlab-k8s-node-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-02] => (item=prometheus-adapter.tar)
PLAY RECAP **********************************************************************************************************************************************************************************
easzlab-k8s-ceph-01 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-ceph-02 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-ceph-03 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-master-01 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-master-02 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-master-03 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-node-01 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-node-02 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
easzlab-k8s-node-03 : ok=4 changed=3 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
[root@easzlab-deploy-01 ansible]#kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 33m
alertmanager-main-1 2/2 Running 0 33m
alertmanager-main-2 2/2 Running 0 33m
blackbox-exporter-569d5d4bb7-tnjz4 3/3 Running 0 34m
grafana-58b8dcfd8b-qdqhv 1/1 Running 0 34m
kube-state-metrics-5bd9d9bf68-4ftf8 3/3 Running 0 34m
node-exporter-62ss7 2/2 Running 0 34m
node-exporter-9dzsd 2/2 Running 0 34m
node-exporter-9qhl4 2/2 Running 0 34m
node-exporter-k42x6 2/2 Running 0 34m
node-exporter-kbcbs 2/2 Running 0 34m
node-exporter-kth2q 2/2 Running 0 34m
node-exporter-m2bkw 2/2 Running 0 34m
node-exporter-pz5lm 2/2 Running 0 34m
node-exporter-vftfk 2/2 Running 0 34m
prometheus-adapter-867598997b-9rj7q 1/1 Running 0 34m
prometheus-adapter-867598997b-tqprp 1/1 Running 0 34m
prometheus-k8s-0 2/2 Running 0 33m
prometheus-k8s-1 2/2 Running 0 33m
prometheus-operator-7b64d465b9-t4jtq 2/2 Running 0 34m
取消grafana-networkPolicy限制,否则无法通过NodePort本地方访问grafana dashboard界面,有必要也可以取消prometheus、alertmanager
[root@easzlab-deploy-01 ansible]#cd /data/prometheus/kube-prometheus-0.11.0/
[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl delete -f manifests/grafana-networkPolicy.yaml
networkpolicy.networking.k8s.io "grafana" deleted
设置grafana svc NodePort
[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl edit svc -n monitoring grafana
[root@easzlab-deploy-01 files]#kubectl edit svc -n monitoring grafana
# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
apiVersion: v1
kind: Service
metadata:
creationTimestamp: "2022-09-29T11:41:07Z"
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 8.5.5
name: grafana
namespace: monitoring
resourceVersion: "191930"
uid: 69f581b5-5f67-4045-9a0e-fbf0d81574c4
spec:
clusterIP: 10.100.88.29
clusterIPs:
- 10.100.88.29
externalTrafficPolicy: Cluster
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
nodePort: 60672
port: 3000
protocol: TCP
targetPort: http
selector:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: None
type: NodePort #修改此行
status:
loadBalancer: {}
访问grafana dashboard页面
导入模板监控模板
监控ceph
[root@easzlab-deploy-01 monitoring]#pwd
/data/ceph/rook-1.10.2/deploy/examples/monitoring
[root@easzlab-deploy-01 monitoring]#kubectl create -f prometheus.yaml
serviceaccount/prometheus created
clusterrole.rbac.authorization.k8s.io/prometheus created
clusterrole.rbac.authorization.k8s.io/prometheus-rules created
clusterrolebinding.rbac.authorization.k8s.io/prometheus created
prometheus.monitoring.coreos.com/rook-prometheus created
[root@easzlab-deploy-01 monitoring]#kubectl create -f prometheus-service.yaml
service/rook-prometheus created
[root@easzlab-deploy-01 monitoring]#kubectl create -f rbac.yaml
role.rbac.authorization.k8s.io/rook-ceph-monitor created
rolebinding.rbac.authorization.k8s.io/rook-ceph-monitor created
role.rbac.authorization.k8s.io/rook-ceph-metrics created
rolebinding.rbac.authorization.k8s.io/rook-ceph-metrics created
role.rbac.authorization.k8s.io/rook-ceph-monitor-mgr created
rolebinding.rbac.authorization.k8s.io/rook-ceph-monitor-mgr created
[root@easzlab-deploy-01 monitoring]#kubectl create -f service-monitor.yaml
servicemonitor.monitoring.coreos.com/rook-ceph-mgr created
#注释
[root@easzlab-deploy-01 monitoring]#vim csi-metrics-service-monitor.yaml
# comment csi-grpc-metrics related information if csi grpc metrics is not enabled
# - port: csi-grpc-metrics
# path: /metrics
# interval: 5s
[root@easzlab-deploy-01 monitoring]#kubectl create -f csi-metrics-service-monitor.yaml
servicemonitor.monitoring.coreos.com/csi-metrics created
[root@easzlab-deploy-01 monitoring]#kubectl create -f localrules.yaml
prometheusrule.monitoring.coreos.com/prometheus-ceph-rules created
[root@easzlab-deploy-01 monitoring]#kubectl create -f keda-rgw.yaml
error: resource mapping not found for name: "rgw-scale" namespace: "rook-ceph" from "keda-rgw.yaml": no matches for kind "ScaledObject" in version "keda.sh/v1alpha1"
ensure CRDs are installed first
[root@easzlab-deploy-01 monitoring]#kubectl create -f externalrules.yaml
Error from server (AlreadyExists): error when creating "externalrules.yaml": prometheusrules.monitoring.coreos.com "prometheus-ceph-rules" already exists