5 changed files with 42 additions and 179 deletions
@ -1,107 +1,15 @@ |
|||||||
global: |
--- |
||||||
# The smarthost and SMTP sender used for mail notifications. |
|
||||||
smtp_smarthost: 'localhost:25' |
|
||||||
smtp_from: 'alertmanager@example.org' |
|
||||||
|
|
||||||
# The root route on which each incoming alert enters. |
|
||||||
route: |
route: |
||||||
# The root route must not have any matchers as it is the entry point for |
receiver: telegram.bot |
||||||
# all alerts. It needs to have a receiver configured so alerts that do not |
group_by: [...] |
||||||
# match any of the sub-routes are sent to someone. |
|
||||||
receiver: 'team-X-mails' |
|
||||||
|
|
||||||
# The labels by which incoming alerts are grouped together. For example, |
|
||||||
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would |
|
||||||
# be batched into a single group. |
|
||||||
# |
|
||||||
# To aggregate by all possible labels use '...' as the sole label name. |
|
||||||
# This effectively disables aggregation entirely, passing through all |
|
||||||
# alerts as-is. This is unlikely to be what you want, unless you have |
|
||||||
# a very low alert volume or your upstream notification system performs |
|
||||||
# its own grouping. Example: group_by: [...] |
|
||||||
group_by: ['alertname', 'cluster'] |
|
||||||
|
|
||||||
# When a new group of alerts is created by an incoming alert, wait at |
|
||||||
# least 'group_wait' to send the initial notification. |
|
||||||
# This way ensures that you get multiple alerts for the same group that start |
|
||||||
# firing shortly after another are batched together on the first |
|
||||||
# notification. |
|
||||||
group_wait: 30s |
group_wait: 30s |
||||||
|
group_interval: 1m |
||||||
# When the first notification was sent, wait 'group_interval' to send a batch |
repeat_interval: 1h |
||||||
# of new alerts that started firing for that group. |
|
||||||
group_interval: 5m |
|
||||||
|
|
||||||
# If an alert has successfully been sent, wait 'repeat_interval' to |
|
||||||
# resend them. |
|
||||||
repeat_interval: 3h |
|
||||||
|
|
||||||
# All the above attributes are inherited by all child routes and can |
|
||||||
# overwritten on each. |
|
||||||
|
|
||||||
# The child route trees. |
|
||||||
routes: |
|
||||||
# This routes performs a regular expression match on alert labels to |
|
||||||
# catch alerts that are related to a list of services. |
|
||||||
- match_re: |
|
||||||
service: ^(foo1|foo2|baz)$ |
|
||||||
receiver: team-X-mails |
|
||||||
|
|
||||||
# The service has a sub-route for critical alerts, any alerts |
|
||||||
# that do not match, i.e. severity != critical, fall-back to the |
|
||||||
# parent node and are sent to 'team-X-mails' |
|
||||||
routes: |
|
||||||
- match: |
|
||||||
severity: critical |
|
||||||
receiver: team-X-pager |
|
||||||
|
|
||||||
- match: |
|
||||||
service: files |
|
||||||
receiver: team-Y-mails |
|
||||||
|
|
||||||
routes: |
|
||||||
- match: |
|
||||||
severity: critical |
|
||||||
receiver: team-Y-pager |
|
||||||
|
|
||||||
# This route handles all alerts coming from a database service. If there's |
|
||||||
# no team to handle it, it defaults to the DB team. |
|
||||||
- match: |
|
||||||
service: database |
|
||||||
|
|
||||||
receiver: team-DB-pager |
|
||||||
# Also group alerts by affected database. |
|
||||||
group_by: [alertname, cluster, database] |
|
||||||
|
|
||||||
routes: |
|
||||||
- match: |
|
||||||
owner: team-X |
|
||||||
receiver: team-X-pager |
|
||||||
|
|
||||||
- match: |
|
||||||
owner: team-Y |
|
||||||
receiver: team-Y-pager |
|
||||||
|
|
||||||
|
|
||||||
# Inhibition rules allow to mute a set of alerts given that another alert is |
|
||||||
# firing. |
|
||||||
# We use this to mute any warning-level notifications if the same alert is |
|
||||||
# already critical. |
|
||||||
inhibit_rules: |
|
||||||
- source_matchers: |
|
||||||
- severity="critical" |
|
||||||
target_matchers: |
|
||||||
- severity="warning" |
|
||||||
# Apply inhibition if the alertname is the same. |
|
||||||
# CAUTION: |
|
||||||
# If all label names listed in `equal` are missing |
|
||||||
# from both the source and target alerts, |
|
||||||
# the inhibition rule will apply! |
|
||||||
equal: ['alertname'] |
|
||||||
|
|
||||||
|
|
||||||
receivers: |
receivers: |
||||||
- name: 'telegram-bot' |
- name: telegram.bot |
||||||
telegram_configs: |
telegram_configs: |
||||||
bot_token: "5209410321:AAGqy6WrQZQRQ0qx0pww8K6KPqzXIeRRosA" |
- api_url: https://api.telegram.org |
||||||
chat_id: 65498889 |
bot_token: 5209410321:AAGqy6WrQZQRQ0qx0pww8K6KPqzXIeRRosA |
||||||
|
chat_id: 65498889 |
||||||
|
parse_mode: HTML |
||||||
|
|||||||
@ -1,33 +0,0 @@ |
|||||||
services: |
|
||||||
node-exporter: |
|
||||||
container_name: node-exporter |
|
||||||
image: prom/node-exporter:latest |
|
||||||
network_mode: host |
|
||||||
|
|
||||||
prometheus: |
|
||||||
container_name: prometheus |
|
||||||
image: prom/prometheus:latest |
|
||||||
network_mode: host |
|
||||||
volumes: |
|
||||||
- /home/f2256342/forge/monitor/prometheus.yml:/etc/prometheus/prometheus.yml |
|
||||||
- ./alert.rules.yml:/etc/prometheus/alert.rules.yml |
|
||||||
|
|
||||||
grafana: |
|
||||||
container_name: grafana |
|
||||||
image: grafana/grafana:latest |
|
||||||
network_mode: host |
|
||||||
|
|
||||||
data-generator: |
|
||||||
container_name: data-generator |
|
||||||
build: |
|
||||||
context: ../prometheus-data-generator/ |
|
||||||
network_mode: host |
|
||||||
volumes: |
|
||||||
- /home/f2256342/forge/prometheus-data-generator/config.yml:/config.yml |
|
||||||
|
|
||||||
alertmanager: |
|
||||||
container_name: alertmanager |
|
||||||
image: prom/alertmanager |
|
||||||
network_mode: host |
|
||||||
volumes: |
|
||||||
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml |
|
||||||
@ -1,37 +1,37 @@ |
|||||||
global: |
global: |
||||||
scrape_interval: 15s |
scrape_interval: 15s |
||||||
external_labels: |
external_labels: |
||||||
monitor: "codelab-monitor" |
monitor: codelab-monitor |
||||||
|
|
||||||
rule_files: |
rule_files: |
||||||
- alert.rules.yml |
- alert.rules.yml |
||||||
|
|
||||||
alerting: |
alerting: |
||||||
alertmanagers: |
alertmanagers: |
||||||
- static_configs: |
- static_configs: |
||||||
- targets: |
- targets: |
||||||
- "localhost:9093" |
- localhost:9093 |
||||||
|
|
||||||
scrape_configs: |
scrape_configs: |
||||||
- job_name: "cfe-acesso" |
- job_name: cfe-acesso |
||||||
scheme: "https" |
scheme: https |
||||||
scrape_interval: 5s |
scrape_interval: 5s |
||||||
static_configs: |
static_configs: |
||||||
- targets: ["mobi2.bb.com.br"] |
- targets: [mobi2.bb.com.br] |
||||||
metrics_path: "/cfe-acesso/api/v1/info/metrics" |
metrics_path: /cfe-acesso/api/v1/info/metrics |
||||||
|
|
||||||
- job_name: "node-exporter" |
- job_name: node-exporter |
||||||
scrape_interval: 5s |
scrape_interval: 5s |
||||||
static_configs: |
static_configs: |
||||||
- targets: ["localhost:9100"] |
- targets: [localhost:9100] |
||||||
|
|
||||||
- job_name: "prometheus" |
- job_name: prometheus |
||||||
relabel_configs: |
relabel_configs: |
||||||
scrape_interval: 5s |
scrape_interval: 5s |
||||||
static_configs: |
static_configs: |
||||||
- targets: ["localhost:9090"] |
- targets: [localhost:9090] |
||||||
|
|
||||||
- job_name: "data-exporter" |
- job_name: data-exporter |
||||||
scrape_interval: 5s |
scrape_interval: 5s |
||||||
static_configs: |
static_configs: |
||||||
- targets: ["localhost:9000"] |
- targets: [localhost:9000] |
||||||
|
|||||||
Loading…
Reference in new issue