Browse Source

update

master
F2256342 Daniel de Oliveira Carvalho 4 years ago
parent
commit
519999b6ab
  1. 2
      alert.rules.yml
  2. 114
      alertmanager.yml
  3. 33
      docker-compose.bb.yml
  4. 46
      docker-compose.yml
  5. 26
      prometheus.yml

2
alert.rules.yml

@ -5,7 +5,7 @@ groups:
expr: up == 0 expr: up == 0
for: 1m for: 1m
labels: labels:
severity: "critical" severity: critical
annotations: annotations:
summary: "Endpoint {{ $labels.instance }} down" summary: "Endpoint {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."

114
alertmanager.yml

@ -1,107 +1,15 @@
global: ---
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'localhost:25'
smtp_from: 'alertmanager@example.org'
# The root route on which each incoming alert enters.
route: route:
# The root route must not have any matchers as it is the entry point for receiver: telegram.bot
# all alerts. It needs to have a receiver configured so alerts that do not group_by: [...]
# match any of the sub-routes are sent to someone.
receiver: 'team-X-mails'
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
#
# To aggregate by all possible labels use '...' as the sole label name.
# This effectively disables aggregation entirely, passing through all
# alerts as-is. This is unlikely to be what you want, unless you have
# a very low alert volume or your upstream notification system performs
# its own grouping. Example: group_by: [...]
group_by: ['alertname', 'cluster']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s group_wait: 30s
group_interval: 1m
# When the first notification was sent, wait 'group_interval' to send a batch repeat_interval: 1h
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
# This routes performs a regular expression match on alert labels to
# catch alerts that are related to a list of services.
- match_re:
service: ^(foo1|foo2|baz)$
receiver: team-X-mails
# The service has a sub-route for critical alerts, any alerts
# that do not match, i.e. severity != critical, fall-back to the
# parent node and are sent to 'team-X-mails'
routes:
- match:
severity: critical
receiver: team-X-pager
- match:
service: files
receiver: team-Y-mails
routes:
- match:
severity: critical
receiver: team-Y-pager
# This route handles all alerts coming from a database service. If there's
# no team to handle it, it defaults to the DB team.
- match:
service: database
receiver: team-DB-pager
# Also group alerts by affected database.
group_by: [alertname, cluster, database]
routes:
- match:
owner: team-X
receiver: team-X-pager
- match:
owner: team-Y
receiver: team-Y-pager
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_matchers:
- severity="critical"
target_matchers:
- severity="warning"
# Apply inhibition if the alertname is the same.
# CAUTION:
# If all label names listed in `equal` are missing
# from both the source and target alerts,
# the inhibition rule will apply!
equal: ['alertname']
receivers: receivers:
- name: 'telegram-bot' - name: telegram.bot
telegram_configs: telegram_configs:
bot_token: "5209410321:AAGqy6WrQZQRQ0qx0pww8K6KPqzXIeRRosA" - api_url: https://api.telegram.org
chat_id: 65498889 bot_token: 5209410321:AAGqy6WrQZQRQ0qx0pww8K6KPqzXIeRRosA
chat_id: 65498889
parse_mode: HTML

33
docker-compose.bb.yml

@ -1,33 +0,0 @@
services:
node-exporter:
container_name: node-exporter
image: prom/node-exporter:latest
network_mode: host
prometheus:
container_name: prometheus
image: prom/prometheus:latest
network_mode: host
volumes:
- /home/f2256342/forge/monitor/prometheus.yml:/etc/prometheus/prometheus.yml
- ./alert.rules.yml:/etc/prometheus/alert.rules.yml
grafana:
container_name: grafana
image: grafana/grafana:latest
network_mode: host
data-generator:
container_name: data-generator
build:
context: ../prometheus-data-generator/
network_mode: host
volumes:
- /home/f2256342/forge/prometheus-data-generator/config.yml:/config.yml
alertmanager:
container_name: alertmanager
image: prom/alertmanager
network_mode: host
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml

46
docker-compose.yml

@ -2,44 +2,32 @@ services:
node-exporter: node-exporter:
container_name: node-exporter container_name: node-exporter
image: prom/node-exporter:latest image: prom/node-exporter:latest
network_mode: bridge network_mode: host
ports:
- "9100:9100"
grafana: grafana:
container_name: grafana container_name: grafana
image: grafana/grafana:latest image: grafana/grafana:latest
network_mode: bridge network_mode: host
ports:
- "9080:3000"
data-generator:
container_name: data-generator
build:
context: ../prometheus-data-generator/
network_mode: bridge
ports:
- "9000:9000"
volumes:
# - ../prometheus-data-generator/config.yml:/config.yml
- /home/f2256342/forge/prometheus-data-generator/config.yml:/config.yml
command: --no-collector.rapl
# data-generator:
# container_name: data-generator
# build:
# context: ../prometheus-data-generator/
# network_mode: host
# volumes:
# - /home/f2256342/forge/prometheus-data-generator/config.yml:/config.yml
prometheus: prometheus:
container_name: prometheus container_name: prometheus
image: prom/prometheus:latest image: prom/prometheus:latest
depends_on: network_mode: host
- node-exporter
- data-generator
network_mode: bridge
ports:
- "9090:9090"
env_file:
- ./.env
volumes: volumes:
# - /home/yutsuo/forge/monitor/prometheus.yml:/etc/prometheus/prometheus.yml
- /home/f2256342/forge/monitor/prometheus.yml:/etc/prometheus/prometheus.yml - /home/f2256342/forge/monitor/prometheus.yml:/etc/prometheus/prometheus.yml
# command: --enable-feature=expand-external-labels --config.file=/etc/prometheus/prometheus.yml - ./alert.rules.yml:/etc/prometheus/alert.rules.yml
alertmanager:
container_name: alertmanager
image: prom/alertmanager
network_mode: host
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml

26
prometheus.yml

@ -1,37 +1,37 @@
global: global:
scrape_interval: 15s scrape_interval: 15s
external_labels: external_labels:
monitor: "codelab-monitor" monitor: codelab-monitor
rule_files: rule_files:
- alert.rules.yml - alert.rules.yml
alerting: alerting:
alertmanagers: alertmanagers:
- static_configs: - static_configs:
- targets: - targets:
- "localhost:9093" - localhost:9093
scrape_configs: scrape_configs:
- job_name: "cfe-acesso" - job_name: cfe-acesso
scheme: "https" scheme: https
scrape_interval: 5s scrape_interval: 5s
static_configs: static_configs:
- targets: ["mobi2.bb.com.br"] - targets: [mobi2.bb.com.br]
metrics_path: "/cfe-acesso/api/v1/info/metrics" metrics_path: /cfe-acesso/api/v1/info/metrics
- job_name: "node-exporter" - job_name: node-exporter
scrape_interval: 5s scrape_interval: 5s
static_configs: static_configs:
- targets: ["localhost:9100"] - targets: [localhost:9100]
- job_name: "prometheus" - job_name: prometheus
relabel_configs: relabel_configs:
scrape_interval: 5s scrape_interval: 5s
static_configs: static_configs:
- targets: ["localhost:9090"] - targets: [localhost:9090]
- job_name: "data-exporter" - job_name: data-exporter
scrape_interval: 5s scrape_interval: 5s
static_configs: static_configs:
- targets: ["localhost:9000"] - targets: [localhost:9000]

Loading…
Cancel
Save