網(wǎng)站首頁 編程語言 正文
docker-compose-monitor.yml
version: '2' networks: monitor: driver: bridge services: influxdb: image: influxdb:latest container_name: tig-influxdb ports: - "18083:8083" - "18086:8086" - "18090:8090" env_file: - 'env.influxdb' volumes: # Data persistency # sudo mkdir -p ./influxdb/data - ./influxdb/data:/var/lib/influxdb # 配置docker里的時間為東八區(qū)時間 - ./timezone:/etc/timezone:ro - ./localtime:/etc/localtime:ro restart: unless-stopped #停止后自動 telegraf: image: telegraf:latest container_name: tig-telegraf links: - influxdb volumes: - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro - ./timezone:/etc/timezone:ro - ./localtime:/etc/localtime:ro restart: unless-stopped prometheus: image: prom/prometheus container_name: prometheus hostname: prometheus restart: always volumes: - /home/qa/docker/grafana/prometheus.yml:/etc/prometheus/prometheus.yml - /home/qa/docker/grafana/node_down.yml:/etc/prometheus/node_down.yml ports: - '9090:9090' networks: - monitor alertmanager: image: prom/alertmanager container_name: alertmanager hostname: alertmanager restart: always volumes: - /home/qa/docker/grafana/alertmanager.yml:/etc/alertmanager/alertmanager.yml ports: - '9093:9093' networks: - monitor grafana: image: grafana/grafana:6.7.4 container_name: grafana hostname: grafana restart: always ports: - '13000:3000' networks: - monitor node-exporter: image: quay.io/prometheus/node-exporter container_name: node-exporter hostname: node-exporter restart: always ports: - '9100:9100' networks: - monitor cadvisor: image: google/cadvisor:latest container_name: cadvisor hostname: cadvisor restart: always volumes: - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro ports: - '18080:8080' networks: - monitor
alertmanager.yml
global: resolve_timeout: 5m smtp_from: '郵箱' smtp_smarthost: 'smtp.exmail.qq.com:25' smtp_auth_username: '郵箱' smtp_auth_password: '密碼' smtp_require_tls: false smtp_hello: 'qq.com' route: group_by: ['alertname'] group_wait: 5s group_interval: 5s repeat_interval: 5m receiver: 'email' receivers: - name: 'email' email_configs: - to: '收件郵箱' send_resolved: true inhibit_rules: - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['alertname', 'dev', 'instance']
prometheus.yml
global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: ['192.168.32.117:9093'] # - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "node_down.yml" # - "node-exporter-alert-rules.yml" # - "first_rules.yml" # - "second_rules.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # IO存儲節(jié)點組 - job_name: 'io' scrape_interval: 8s static_configs: #端口為node-exporter啟動的端口 - targets: ['192.168.32.117:9100'] - targets: ['192.168.32.196:9100'] - targets: ['192.168.32.136:9100'] - targets: ['192.168.32.193:9100'] - targets: ['192.168.32.153:9100'] - targets: ['192.168.32.185:9100'] - targets: ['192.168.32.190:19100'] - targets: ['192.168.32.192:9100'] # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'cadvisor' static_configs: #端口為cadvisor啟動的端口 - targets: ['192.168.32.117:18080'] - targets: ['192.168.32.193:8080'] - targets: ['192.168.32.153:8080'] - targets: ['192.168.32.185:8080'] - targets: ['192.168.32.190:18080'] - targets: ['192.168.32.192:18080']
node_down.yml
groups: - name: node_down rules: - alert: InstanceDown expr: up == 0 for: 1m labels: user: test annotations: summary: 'Instance {{ $labels.instance }} down' description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.' #剩余內(nèi)存小于10% - alert: 剩余內(nèi)存小于10% expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 for: 2m labels: severity: warning annotations: summary: Host out of memory (instance {{ $labels.instance }}) description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" #剩余磁盤小于10% - alert: 剩余磁盤小于10% expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0 for: 2m labels: severity: warning annotations: summary: Host out of disk space (instance {{ $labels.instance }}) description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" #cpu負載 > 80% - alert: CPU負載 > 80% expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80 for: 0m labels: severity: warning annotations: summary: Host high CPU load (instance {{ $labels.instance }}) description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
告警:https://awesome-prometheus-alerts.grep.to/rules#prometheus-self-monitoring
官網(wǎng)儀表盤:https://grafana.com/grafana/dashboards/
原文鏈接:https://www.cnblogs.com/runzhao/p/15716274.html
相關(guān)推薦
- 2022-07-13 w2ui fixedBody 屬性
- 2022-10-07 android?studio后臺服務(wù)使用詳解_Android
- 2022-12-15 Native?Memory?Tracking追蹤區(qū)域示例分析_React
- 2022-03-03 GitHub 私人private倉庫添加成員(協(xié)作者Collaborators)
- 2022-12-07 C++?IO設(shè)備讀寫功能實現(xiàn)詳解_C 語言
- 2022-11-02 Rust指南枚舉類與模式匹配詳解_Rust語言
- 2022-12-15 conda創(chuàng)建環(huán)境過程出現(xiàn)"Solving?environment:?failed"報錯的詳細解決方
- 2023-01-17 Qt中控件的函數(shù)使用教程分享_C 語言
- 最近更新
-
- window11 系統(tǒng)安裝 yarn
- 超詳細win安裝深度學(xué)習(xí)環(huán)境2025年最新版(
- Linux 中運行的top命令 怎么退出?
- MySQL 中decimal 的用法? 存儲小
- get 、set 、toString 方法的使
- @Resource和 @Autowired注解
- Java基礎(chǔ)操作-- 運算符,流程控制 Flo
- 1. Int 和Integer 的區(qū)別,Jav
- spring @retryable不生效的一種
- Spring Security之認證信息的處理
- Spring Security之認證過濾器
- Spring Security概述快速入門
- Spring Security之配置體系
- 【SpringBoot】SpringCache
- Spring Security之基于方法配置權(quán)
- redisson分布式鎖中waittime的設(shè)
- maven:解決release錯誤:Artif
- restTemplate使用總結(jié)
- Spring Security之安全異常處理
- MybatisPlus優(yōu)雅實現(xiàn)加密?
- Spring ioc容器與Bean的生命周期。
- 【探索SpringCloud】服務(wù)發(fā)現(xiàn)-Nac
- Spring Security之基于HttpR
- Redis 底層數(shù)據(jù)結(jié)構(gòu)-簡單動態(tài)字符串(SD
- arthas操作spring被代理目標對象命令
- Spring中的單例模式應(yīng)用詳解
- 聊聊消息隊列,發(fā)送消息的4種方式
- bootspring第三方資源配置管理
- GIT同步修改后的遠程分支