环境提要:

有两个Prometheus Server:

A:在Linux主机上自建的Prometheus,启动参数为:./prometheus --web.enable-remote-write-receiver,这是一个启动 Prometheus 时使用的命令行选项,用于启用 Prometheus 服务器接收来自其他 Prometheus 实例的推送数据(remote write)的功能,将这个Prometheus作为中央仓库

B:部署在LKE集群上的Prometheus,需要将它采集到的数据推送到A

为了表述简洁,会略去很多概念性的说明

配置过程

两种方式创建Prometheus

一、在Linux上创建A

  1. 更新yum

    sudo yum update -y
  2. 下载源代码、解压并重命名

    # 在下载页面找到合适的版本并下载
    curl -LO url -LO https://github.com/prometheus/prometheus/releases/download/v2.22.0/prometheus-2.22.0.linux-amd64.tar.gz
    tar -xvf prometheus-2.22.0.linux-amd64.tar.gz
    mv prometheus-2.22.0.linux-amd64 prometheus-files
  3. 创建prometheus用户并授予相应的权限

    sudo useradd --no-create-home --shell /bin/false prometheus
    sudo mkdir /etc/prometheus
    sudo mkdir /var/lib/prometheus
    sudo chown prometheus:prometheus /etc/prometheus
    sudo chown prometheus:prometheus /var/lib/prometheus
  4. 复制相关文件到/usr/local/bin,给prometheus授权

    sudo cp prometheus-files/prometheus /usr/local/bin/
    sudo cp prometheus-files/promtool /usr/local/bin/
    sudo chown prometheus:prometheus /usr/local/bin/prometheus
    sudo chown prometheus:prometheus /usr/local/bin/promtool
  5. 将console和console_libraries从prometheus-files移动到/etc/prometheus,并将所有权更改为prometheus用户

    sudo cp -r prometheus-files/consoles /etc/prometheus 
    sudo cp -r prometheus-files/console_libraries /etc/prometheus 
    sudo chown -R prometheus:prometheus /etc/prometheus/consoles 
    sudo chown -R prometheus:prometheus /etc/prometheus /console_libraries
  6. 创建prometheus.yml文件并写入以下内容,然后授权

    sudo vi /etc/prometheus/prometheus.yml
    
    global:
      scrape_interval: 10s
    
    scrape_configs:
      - job_name: 'prometheus'
     scrape_interval: 5s
     static_configs:
       - targets: ['localhost:9090']
    
    sudo chown prometheus:prometheus /etc/prometheus/prometheus.yml
  7. 创建prometheus systemd服务

    sudo vi /etc/systemd/system/prometheus.service
    
    [Unit]
    Description=Prometheus
    Wants=network-online.target
    After=network-online.target
    
    [Service]
    User=prometheus
    Group=prometheus
    Type=simple
    ExecStart=/usr/local/bin/prometheus \
     --config.file /etc/prometheus/prometheus.yml \
     --storage.tsdb.path /var/lib/prometheus/ \
     --web.console.templates=/etc/prometheus/consoles \
     --web.console.libraries=/etc/prometheus/console_libraries
     --web.enable-remote-write-receiver  # 启动接收数据功能
    
    [Install]
    WantedBy=multi-user.target
    
    
    sudo systemctl daemon-reload   // 加入开机自启
    sudo systemctl start prometheus  // 启动服务
    sudo systemctl status prometheus  // 查看服务状态
  8. 查看Prometheus Web UI

    之前步骤确认无误,就可以打开下面的链接查看web界面
    
    http://<prometheus-ip>:9090/graph

二、在k8s上安装B

  1. 创建命名空间

    kubectl create namespace monitoring
  2. 创建RBAC文件clusterRole.yaml 并使之生效
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - ingresses
  verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: default
  namespace: monitoring


kubectl create -f clusterRole.yaml
  1. 创建一个Config Map用来存放Prometheus配置,使其生效

    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: prometheus-server-conf
      labels:
     name: prometheus-server-conf
      namespace: monitoring
    data:
      prometheus.yml: |-
     global:
       scrape_interval: 5s
       evaluation_interval: 5s
     rule_files:
       - /etc/prometheus/prometheus.rules
     scrape_configs:
       - job_name: 'node-exporter'
         kubernetes_sd_configs:
           - role: endpoints
         relabel_configs:
         - source_labels: [__meta_kubernetes_endpoints_name]
           regex: 'node-exporter'
           action: keep
       - job_name: 'kubernetes-apiservers'
         kubernetes_sd_configs:
         - role: endpoints
         scheme: https
         tls_config:
           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
         relabel_configs:
         - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
           action: keep
           regex: default;kubernetes;https
       - job_name: 'kubernetes-nodes'
         scheme: https
         tls_config:
           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
         kubernetes_sd_configs:
         - role: node
         relabel_configs:
         - action: labelmap
           regex: __meta_kubernetes_node_label_(.+)
         - target_label: __address__
           replacement: kubernetes.default.svc:443
         - source_labels: [__meta_kubernetes_node_name]
           regex: (.+)
           target_label: __metrics_path__
           replacement: /api/v1/nodes/${1}/proxy/metrics
       - job_name: 'kubernetes-pods'
         kubernetes_sd_configs:
         - role: pod
         relabel_configs:
         - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
           action: keep
           regex: true
         - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
           action: replace
           target_label: __metrics_path__
           regex: (.+)
         - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
           action: replace
           regex: ([^:]+)(?::\d+)?;(\d+)
           replacement: $1:$2
           target_label: __address__
         - action: labelmap
           regex: __meta_kubernetes_pod_label_(.+)
         - source_labels: [__meta_kubernetes_namespace]
           action: replace
           target_label: kubernetes_namespace
         - source_labels: [__meta_kubernetes_pod_name]
           action: replace
           target_label: kubernetes_pod_name
       - job_name: 'kube-state-metrics'
         static_configs:
           - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080']
       - job_name: 'kubernetes-cadvisor'
         scheme: https
         tls_config:
           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
         kubernetes_sd_configs:
         - role: node
         relabel_configs:
         - action: labelmap
           regex: __meta_kubernetes_node_label_(.+)
         - target_label: __address__
           replacement: kubernetes.default.svc:443
         - source_labels: [__meta_kubernetes_node_name]
           regex: (.+)
           target_label: __metrics_path__
           replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
       - job_name: 'kubernetes-service-endpoints'
         kubernetes_sd_configs:
         - role: endpoints
         relabel_configs:
         - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
           action: keep
           regex: true
         - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
           action: replace
           target_label: __scheme__
           regex: (https?)
         - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
           action: replace
           target_label: __metrics_path__
           regex: (.+)
         - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
           action: replace
           target_label: __address__
           regex: ([^:]+)(?::\d+)?;(\d+)
           replacement: $1:$2
         - action: labelmap
           regex: __meta_kubernetes_service_label_(.+)
         - source_labels: [__meta_kubernetes_namespace]
           action: replace
           target_label: kubernetes_namespace
         - source_labels: [__meta_kubernetes_service_name]
           action: replace
           target_label: kubernetes_name
    
    
    kubectl create -f config-map.yaml          
  2. 创建一个Prometheus Deployment,使其生效

    apiVersion: apps/v1
    kind: Deployment
    metadata:
      name: prometheus-deployment
      namespace: monitoring
      labels:
     app: prometheus-server
    spec:
      replicas: 1
      selector:
     matchLabels:
       app: prometheus-server
      template:
     metadata:
       labels:
         app: prometheus-server
     spec:
       containers:
         - name: prometheus
           image: prom/prometheus
           args:
             - "--storage.tsdb.retention.time=12h"
             - "--config.file=/etc/prometheus/prometheus.yml"
             - "--storage.tsdb.path=/prometheus/"
           ports:
             - containerPort: 9090
           resources:
             requests:
               cpu: 500m
               memory: 500M
             limits:
               cpu: 1
               memory: 1Gi
           volumeMounts:
             - name: prometheus-config-volume
               mountPath: /etc/prometheus/
             - name: prometheus-storage-volume
               mountPath: /prometheus/
       volumes:
         - name: prometheus-config-volume
           configMap:
             defaultMode: 420
             name: prometheus-server-conf
      
         - name: prometheus-storage-volume
           emptyDir: {}
    
    kubectl create  -f prometheus-deployment.yaml // 创建
    kubectl get all -n monitoring  // 检查状态

三、配置发送 B --> A

  1. 在B的配置文件config-map.yaml里填写以下内容,新增2行(13,14)

      8 data:
      9   prometheus.yml: |-
     10     global:
     11       scrape_interval: 5s
     12       evaluation_interval: 5s
     13     remote_write:
     14       - url: "http://A的IP:9090/api/v1/write"
    
     kubectl apply -f config-map.yaml
  2. 检查推送的数据是否生效

查看prometheus server B的pod日志同时在A的web UI上搜索pod相关的metric指标以验证数据是否推送过来

四、安装Node Exporter(可选)

  1. 创建daemonset.yaml并写入以下内容,使其生效

    apiVersion: apps/v1
    kind: DaemonSet
    metadata:
      labels:
     app.kubernetes.io/component: exporter
     app.kubernetes.io/name: node-exporter
      name: node-exporter
      namespace: monitoring
    spec:
      selector:
     matchLabels:
       app.kubernetes.io/component: exporter
       app.kubernetes.io/name: node-exporter
      template:
     metadata:
       labels:
         app.kubernetes.io/component: exporter
         app.kubernetes.io/name: node-exporter
     spec:
       containers:
       - args:
         - --path.sysfs=/host/sys
         - --path.rootfs=/host/root
         - --no-collector.wifi
         - --no-collector.hwmon
         - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
         - --collector.netclass.ignored-devices=^(veth.*)$
         name: node-exporter
         image: prom/node-exporter
         ports:
           - containerPort: 9100
             protocol: TCP
         resources:
           limits:
             cpu: 250m
             memory: 180Mi
           requests:
             cpu: 102m
             memory: 180Mi
         volumeMounts:
         - mountPath: /host/sys
           mountPropagation: HostToContainer
           name: sys
           readOnly: true
         - mountPath: /host/root
           mountPropagation: HostToContainer
           name: root
           readOnly: true
       volumes:
       - hostPath:
           path: /sys
         name: sys
       - hostPath:
           path: /
         name: root
    
    kubectl create -f daemonset.yaml   // 创建
    kubectl get daemonset -n monitoring  // 查看状态
  2. 创建service.yaml并写入以下内容,使其生效

    ---
    kind: Service
    apiVersion: v1
    metadata:
      name: node-exporter
      namespace: monitoring
      annotations:
       prometheus.io/scrape: 'true'
       prometheus.io/port:   '9100'
    spec:
      selector:
       app.kubernetes.io/component: exporter
       app.kubernetes.io/name: node-exporter
      ports:
      - name: node-exporter
     protocol: TCP
     port: 9100
     targetPort: 9100
    
    kubectl create -f service.yaml // 创建
    kubectl get endpoints -n monitoring  // 查看端点指向
  3. 验证

去A的web UI 搜索node相关指标

以上便是Prometheus集群之间数据交互的一个简单验证示例,所有步骤均经过实测验证,请放心使用。

EngineerLeo
598 声望38 粉丝

专注于云原生、AI等相关技术