添加节点监控
[root@10 prometheus]# vim prometheus.yml - job_name: 'node_exporter_采集器' metrics_path: /probe params: module: [tcp_connect] static_configs: - targets: ['10.10.0.175:9100'] - targets: ['10.10.0.176:9100'] - targets: ['10.10.0.177:9100'] - targets: ['10.10.0.178:9100'] - targets: ['10.10.0.179:9100'] - targets: ['10.10.0.180:9100'] - targets: ['10.10.0.187:9100'] - targets: ['10.10.0.186:9100'] - targets: ['10.10.0.189:9100'] - targets: ['10.10.0.190:9100'] - targets: ['10.10.0.188:9100'] - targets: ['10.10.0.191:9100'] - targets: ['10.10.0.192:9100'] - targets: ['10.10.0.193:9100'] - targets: ['10.10.0.195:9100'] - targets: ['10.10.0.194:9100'] - targets: ['10.10.0.196:9100'] - targets: ['10.10.0.197:9100'] - targets: ['10.10.0.198:9100'] - targets: ['10.10.0.199:9100'] - targets: ['10.10.0.200:9100'] - targets: ['10.10.0.201:9100'] - targets: ['10.10.0.202:9100'] - targets: ['10.10.0.208:9100'] - targets: ['10.10.0.210:9100'] - targets: ['10.10.0.208:9100'] - targets: ['10.10.0.207:9100'] - targets: ['10.10.0.209:9100'] - targets: ['10.10.0.212:9100'] - targets: ['10.10.0.211:9100'] - targets: ['10.10.0.216:9100'] - targets: ['10.11.0.217:9100'] - targets: ['10.10.0.218:9100'] - targets: ['10.10.0.219:9100'] - targets: ['10.10.0.220:9100'] - targets: ['10.10.0.222:9100'] - targets: ['10.10.0.221:9100'] labels: instance: 'port_status' group: 'node_exporter' relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: 10.11.0.217:9115
检查配置文件
[root@10 prometheus]# ./promtool check config prometheus.yml Checking prometheus.yml SUCCESS: 1 rule files found Checking rules.yml SUCCESS: 14 rules found [root@10 prometheus]# systemctl reload prometheus.service
报警规则
[root@10 prometheus]# vim rules.yml groups: - name: port rules: - alert: 应用端口监控 # 告警名称 expr: probe_success == 0 # 告警的判定条件,参考Prometheus高级查询来设定 for: 1s # 满足告警条件持续时间多久后,才会发送告警 labels: #标签项 status: 严重 annotations: # 解析项,详细解释告警信息 summary: "group:{{$labels.instance}} High Node port has been down " description: "group:{{$labels.instance}} 服务已停止使用 "
检查配置文件
[root@10 prometheus]# ./promtool check config prometheus.yml Checking prometheus.yml SUCCESS: 1 rule files found Checking rules.yml SUCCESS: 14 rules found [root@10 prometheus]# systemctl restart alertmanager.service
继续阅读
- 我的QQ
- QQ扫一扫
-
- 我的头条
- 头条扫一扫
-
评论