prometheus监控之进程监控processexporter

1.下载process-exporter

wget https://github.com/ncabatoff/process-exporter/releases/download/v0.7.10/process-exporter-0.7.10.linux-amd64.tar.gz

2 安装部署process-exporter

tar -xf process-exporter-0.7.10.linux-amd64.tar.gz
# 解压并安装
sudo mv process-exporter-0.7.10.linux-amd64 /usr/local/prcess-exporter

注册到系统服务
cat > /usr/lib/systemd/system/process_exporter.service << EOF

[Unit]

Description=process_exporter

Documentation=https://github.com/ncabatoff/process-exporter

After=network.target

[Service]

Type=simple

ExecStart=/usr/local/process_exporter/process-exporter -config.path=/usr/local/process_exporter/process-conf.yaml

Restart=always

[Install]

WantedBy=multi-user.target

EOF

加载并开机自启

systemctl daemon-reload && systemctl enable process_exporter

启动process exporter

systemctl daemon-reload && systemctl enable process_exporter

3 配置process-exporter

指定一个进程:

process_names:
  - name: "{{.Matches}}"
    cmdline:
    - 'redis-server'

指定多个进程:

process_names:
  - name: "{{.Matches}}"
    cmdline:
    - 'test1'
  - name: "{{.Matches}}"
    cmdline:
    - 'test2'
  - name: "{{.Matches}}"
    cmdline:
    - 'test3'

指定所有进程:

process_names:
 - name: "{{.Comm}}"
   cmdline:
    - '.+'

4 配置Prometheus

 - job_name: test05进程监控
    scrape_interval: 2m
    scrape_timeout: 120s
    file_sd_configs:
      - files:
        - /usr/local/prometheus/sd_config/test05-process.json

cat /usr/local/prometheus/sd_config/test05-process.json
[
        {
                "labels": {
                        "desc": "lrma",
                        "group": "lrma",
                        "host_ip": "192.168.11.55",
                        "hostname": "test05"
                },
                "targets": [
                        "192.168.11.55:9256"
                ]
},
{
                "labels": {
                        "desc": "lrma",
                        "group": "lrma",
                        "host_ip": "192.168.11.56",
                        "hostname": "test06"
                },
                "targets": [
                        "192.168.11.56:9256"
                ]
}
]   

重启prometheus

systemctl restart prometheus

5 接入Grafana图形化展示

输入导入的模板id 249,数据源选择Prometheus

6 设置告警

groups:
 - name: redis-server进程挂了
   rules:
   - alert: redis-server进程挂了 
     expr: (namedprocess_namegroup_num_procs{groupname="redis-server"}) == 0
     for: 1m 
     labels: 
        severity: error
        status: 非常严重
     annotations: 
         summary: "{{ labels.hostname }} 上redis-server Alert {{labels.instance }}  has been down for more than 1 minutes"
         description: "redis-server 进程挂了,  当前进程值为 {{ .value }}"
  • 我的微信
  • 这是我的微信扫一扫
  • weinxin
  • 我的微信公众号
  • 我的微信公众号扫一扫
  • weinxin
avatar

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: