安装kube-prometheus
安装kube-prometheus

安装kube-prometheus

官方说明没写是否支持1.25,目前搭建的集群版本是1.25
[root@easzlab-deploy-01 data]#mkdir prometheus
[root@easzlab-deploy-01 data]#cd prometheus/
[root@easzlab-deploy-01 prometheus]#wget https://github.com/prometheus-operator/kube-prometheus/archive/refs/tags/v0.11.0.tar.gz

[root@easzlab-deploy-01 prometheus]#tar xf v0.11.0.tar.gz 
[root@easzlab-deploy-01 prometheus]#cd kube-prometheus-0.11.0/

对grafana、prometheus、altermanger做持久化存储

#创建grafana存储pvc
[root@easzlab-deploy-01 prometheus]#vim grafana-storage-pvc.yaml
apiVersion: v1
kind: Namespace
metadata:
  labels:
    kubernetes.io/metadata.name: monitoring
  name: monitoring

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: grafana-storage
  namespace: monitoring
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 100Gi
  storageClassName: hdd-rook-ceph-block

[root@easzlab-deploy-01 prometheus]#kubectl apply -f grafana-storage-pvc.yaml
namespace/monitoring created
persistentvolumeclaim/grafana-storage created
#在grafana-deployment.yaml增加存储类
[root@easzlab-deploy-01 manifests]#pwd
/data/prometheus/kube-prometheus-0.11.0/manifests
[root@easzlab-deploy-01 manifests]#vim +149 grafana-deployment.yaml
volumes:
151       - name: grafana-storage
152         persistentVolumeClaim:
153             claimName: grafana-storage

#添加在最后对齐上一行
[root@easzlab-deploy-01 manifests]#vim prometheus-prometheus.yaml
storage:
        volumeClaimTemplate:
          spec:
            storageClassName: hdd-rook-ceph-block
            accessModes: ["ReadWriteOnce"]
            resources:
              requests:
                storage: 100Gi
     # retention: 1y


#添加在最后对齐上一行
[root@easzlab-deploy-01 manifests]#vim alertmanager-alertmanager.yaml
storage:
    volumeClaimTemplate:
      spec:
        storageClassName: hdd-rook-ceph-block
        accessModes: ["ReadWriteOnce"]
        resources:
          requests:
            storage: 100Gi

安装kube-prometheues

[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl create -f manifests/setup/
[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl create -f manifests/

#手动加载缺失的image
[root@easzlab-deploy-01 ansible]#vim prometheus_images.yaml 
---
- name: install prometheus images
  hosts: master,node,ceph
  tasks:
    - name: create dir1
      file:
        path: /data/prometheus
        state: directory
    - name: copy
      copy:
        src: "{{ item }}"
        dest: /data/prometheus
      loop:
          - kube-state-metrics.tar
          - prometheus-adapter.tar
    - name: shell
      shell: "nerdctl -n k8s.io load -i /data/prometheus/{{ item }}"
      loop:
        - kube-state-metrics.tar
        - prometheus-adapter.tar

[root@easzlab-deploy-01 ansible]#ansible-playbook  prometheus_images.yaml 

PLAY [install prometheus images] ************************************************************************************************************************************************************

TASK [Gathering Facts] **********************************************************************************************************************************************************************
ok: [easzlab-k8s-master-01]
ok: [easzlab-k8s-master-02]
ok: [easzlab-k8s-master-03]
ok: [easzlab-k8s-node-02]
ok: [easzlab-k8s-node-01]
ok: [easzlab-k8s-node-03]
ok: [easzlab-k8s-ceph-01]
ok: [easzlab-k8s-ceph-02]
ok: [easzlab-k8s-ceph-03]

TASK [create dir1] **************************************************************************************************************************************************************************
changed: [easzlab-k8s-master-03]
changed: [easzlab-k8s-node-01]
changed: [easzlab-k8s-node-02]
changed: [easzlab-k8s-master-01]
changed: [easzlab-k8s-master-02]
changed: [easzlab-k8s-ceph-02]
changed: [easzlab-k8s-node-03]
changed: [easzlab-k8s-ceph-01]
changed: [easzlab-k8s-ceph-03]

TASK [copy] *********************************************************************************************************************************************************************************
changed: [easzlab-k8s-master-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-03] => (item=prometheus-adapter.tar)

TASK [shell] ********************************************************************************************************************************************************************************
changed: [easzlab-k8s-node-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-master-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-master-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-02] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-node-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-03] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-01] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-ceph-02] => (item=kube-state-metrics.tar)
changed: [easzlab-k8s-node-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-01] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-03] => (item=prometheus-adapter.tar)
changed: [easzlab-k8s-ceph-02] => (item=prometheus-adapter.tar)

PLAY RECAP **********************************************************************************************************************************************************************************
easzlab-k8s-ceph-01        : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-ceph-02        : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-ceph-03        : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-master-01      : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-master-02      : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-master-03      : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-node-01        : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-node-02        : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   
easzlab-k8s-node-03        : ok=4    changed=3    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   

[root@easzlab-deploy-01 ansible]#kubectl get pod -n monitoring
NAME                                   READY   STATUS    RESTARTS   AGE
alertmanager-main-0                    2/2     Running   0          33m
alertmanager-main-1                    2/2     Running   0          33m
alertmanager-main-2                    2/2     Running   0          33m
blackbox-exporter-569d5d4bb7-tnjz4     3/3     Running   0          34m
grafana-58b8dcfd8b-qdqhv               1/1     Running   0          34m
kube-state-metrics-5bd9d9bf68-4ftf8    3/3     Running   0          34m
node-exporter-62ss7                    2/2     Running   0          34m
node-exporter-9dzsd                    2/2     Running   0          34m
node-exporter-9qhl4                    2/2     Running   0          34m
node-exporter-k42x6                    2/2     Running   0          34m
node-exporter-kbcbs                    2/2     Running   0          34m
node-exporter-kth2q                    2/2     Running   0          34m
node-exporter-m2bkw                    2/2     Running   0          34m
node-exporter-pz5lm                    2/2     Running   0          34m
node-exporter-vftfk                    2/2     Running   0          34m
prometheus-adapter-867598997b-9rj7q    1/1     Running   0          34m
prometheus-adapter-867598997b-tqprp    1/1     Running   0          34m
prometheus-k8s-0                       2/2     Running   0          33m
prometheus-k8s-1                       2/2     Running   0          33m
prometheus-operator-7b64d465b9-t4jtq   2/2     Running   0          34m

取消grafana-networkPolicy限制,否则无法通过NodePort本地方访问grafana dashboard界面,有必要也可以取消prometheus、alertmanager

[root@easzlab-deploy-01 ansible]#cd /data/prometheus/kube-prometheus-0.11.0/
[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl delete -f manifests/grafana-networkPolicy.yaml
networkpolicy.networking.k8s.io "grafana" deleted

设置grafana svc NodePort

[root@easzlab-deploy-01 kube-prometheus-0.11.0]#kubectl edit svc -n monitoring grafana
[root@easzlab-deploy-01 files]#kubectl edit svc -n monitoring grafana

# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: "2022-09-29T11:41:07Z"
  labels:
    app.kubernetes.io/component: grafana
    app.kubernetes.io/name: grafana
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 8.5.5
  name: grafana
  namespace: monitoring
  resourceVersion: "191930"
  uid: 69f581b5-5f67-4045-9a0e-fbf0d81574c4
spec:
  clusterIP: 10.100.88.29
  clusterIPs:
  - 10.100.88.29
  externalTrafficPolicy: Cluster
  internalTrafficPolicy: Cluster
  ipFamilies:
  - IPv4
  ipFamilyPolicy: SingleStack
  ports:
  - name: http
    nodePort: 60672
    port: 3000
    protocol: TCP
    targetPort: http
  selector:
    app.kubernetes.io/component: grafana
    app.kubernetes.io/name: grafana
    app.kubernetes.io/part-of: kube-prometheus
  sessionAffinity: None
  type: NodePort    #修改此行
status:
  loadBalancer: {}

访问grafana dashboard页面

导入模板监控模板

监控ceph

[root@easzlab-deploy-01 monitoring]#pwd
/data/ceph/rook-1.10.2/deploy/examples/monitoring
[root@easzlab-deploy-01 monitoring]#kubectl create -f prometheus.yaml 
serviceaccount/prometheus created
clusterrole.rbac.authorization.k8s.io/prometheus created
clusterrole.rbac.authorization.k8s.io/prometheus-rules created
clusterrolebinding.rbac.authorization.k8s.io/prometheus created
prometheus.monitoring.coreos.com/rook-prometheus created
[root@easzlab-deploy-01 monitoring]#kubectl create -f prometheus-service.yaml 
service/rook-prometheus created
[root@easzlab-deploy-01 monitoring]#kubectl create -f rbac.yaml 
role.rbac.authorization.k8s.io/rook-ceph-monitor created
rolebinding.rbac.authorization.k8s.io/rook-ceph-monitor created
role.rbac.authorization.k8s.io/rook-ceph-metrics created
rolebinding.rbac.authorization.k8s.io/rook-ceph-metrics created
role.rbac.authorization.k8s.io/rook-ceph-monitor-mgr created
rolebinding.rbac.authorization.k8s.io/rook-ceph-monitor-mgr created
[root@easzlab-deploy-01 monitoring]#kubectl create -f service-monitor.yaml 
servicemonitor.monitoring.coreos.com/rook-ceph-mgr created

#注释
[root@easzlab-deploy-01 monitoring]#vim csi-metrics-service-monitor.yaml 
    # comment csi-grpc-metrics related information if csi grpc metrics is not enabled
#    - port: csi-grpc-metrics
#      path: /metrics
#      interval: 5s


[root@easzlab-deploy-01 monitoring]#kubectl create -f csi-metrics-service-monitor.yaml 
servicemonitor.monitoring.coreos.com/csi-metrics created
[root@easzlab-deploy-01 monitoring]#kubectl create -f localrules.yaml 
prometheusrule.monitoring.coreos.com/prometheus-ceph-rules created
[root@easzlab-deploy-01 monitoring]#kubectl create -f keda-rgw.yaml 
error: resource mapping not found for name: "rgw-scale" namespace: "rook-ceph" from "keda-rgw.yaml": no matches for kind "ScaledObject" in version "keda.sh/v1alpha1"
ensure CRDs are installed first
[root@easzlab-deploy-01 monitoring]#kubectl create -f  externalrules.yaml 
Error from server (AlreadyExists): error when creating "externalrules.yaml": prometheusrules.monitoring.coreos.com "prometheus-ceph-rules" already exists