This commit is contained in:
linge
2023-03-13 23:23:34 +08:00
commit 8c1d7aa660
5 changed files with 185 additions and 0 deletions

12
prometheus/alert.yml Normal file
View File

@ -0,0 +1,12 @@
groups:
- name: Prometheus alert
rules:
# 对任何实例超过30秒无法联系的情况发出警报
- alert: 服务告警
expr: up == 0
for: 30s
labels:
severity: critical
annotations:
instance: "{{ $labels.instance }}"
description: "{{ $labels.job }} 服务已关闭"

36
prometheus/prometheus.yml Normal file
View File

@ -0,0 +1,36 @@
# 全局配置
global:
scrape_interval: 15s # 将搜刮间隔设置为每15秒一次。默认是每1分钟一次。
evaluation_interval: 15s # 每15秒评估一次规则。默认是每1分钟一次。
# Alertmanager 配置
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
# 报警(触发器)配置
rule_files:
- "alert.yml"
# 搜刮配置
scrape_configs:
- job_name: 'prometheus'
# 覆盖全局默认值每15秒从该作业中刮取一次目标
scrape_interval: 15s
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
scrape_interval: 15s
static_configs:
- targets: ['cadvisor:8080']
labels:
instance: Prometheus服务器
- job_name: 'node-exporter'
scrape_interval: 15s
static_configs:
- targets: ['node_exporter:9100']
labels:
instance: Prometheus服务器