From 4ddbc9f256248a8827e0913a90c5dccbcd94b970 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Sat, 26 Feb 2022 11:01:20 -0600 Subject: [PATCH] hosts: Add mtrcs0.p.r *mtrcs0.pyrocufflink.red* is a Raspberry Pi CM4 on a Waveshare CM4-IO-BASE-B carrier board with a NVMe SSD. It runs a custom OS built using Buildroot, and is not a member of the *pyrocufflink.blue* AD domain. *mtrcs0.p.r* hosts Victoria Metrics/`vmagent`, `vmalert`, AlertManager, and Grafana. I've created a unique group and playbook for it, *metricspi*, to manage all these applications together. --- .../nginx/mtrcs0.pyrocufflink.red/server.cer | 1 + .../nginx/mtrcs0.pyrocufflink.red/server.key | 1 + group_vars/metricspi/alertmanager.yml | 24 ++ group_vars/metricspi/alerts.yml | 48 ++++ group_vars/metricspi/blackbox.yml | 45 ++++ group_vars/metricspi/grafana.yml | 18 ++ group_vars/metricspi/main.yml | 2 + group_vars/metricspi/scrape.yml | 231 ++++++++++++++++++ group_vars/metricspi/secrets | 24 ++ group_vars/metricspi/vmalert.yml | 4 + host_vars/mtrcs0.pyrocufflink.red.yml | 8 + hosts | 22 ++ metricspi.yml | 16 ++ roles/ssh-hostkeys/files/ssh_known_hosts | 3 + 14 files changed, 447 insertions(+) create mode 120000 certs/nginx/mtrcs0.pyrocufflink.red/server.cer create mode 120000 certs/nginx/mtrcs0.pyrocufflink.red/server.key create mode 100644 group_vars/metricspi/alertmanager.yml create mode 100644 group_vars/metricspi/alerts.yml create mode 100644 group_vars/metricspi/blackbox.yml create mode 100644 group_vars/metricspi/grafana.yml create mode 100644 group_vars/metricspi/main.yml create mode 100644 group_vars/metricspi/scrape.yml create mode 100644 group_vars/metricspi/secrets create mode 100644 group_vars/metricspi/vmalert.yml create mode 100644 host_vars/mtrcs0.pyrocufflink.red.yml create mode 100644 metricspi.yml diff --git a/certs/nginx/mtrcs0.pyrocufflink.red/server.cer b/certs/nginx/mtrcs0.pyrocufflink.red/server.cer new file mode 120000 index 0000000..c42aef2 --- /dev/null +++ b/certs/nginx/mtrcs0.pyrocufflink.red/server.cer @@ -0,0 +1 @@ +../../lego/_.pyrocufflink.net.crt \ No newline at end of file diff --git a/certs/nginx/mtrcs0.pyrocufflink.red/server.key b/certs/nginx/mtrcs0.pyrocufflink.red/server.key new file mode 120000 index 0000000..534b743 --- /dev/null +++ b/certs/nginx/mtrcs0.pyrocufflink.red/server.key @@ -0,0 +1 @@ +../../lego/_.pyrocufflink.net.key \ No newline at end of file diff --git a/group_vars/metricspi/alertmanager.yml b/group_vars/metricspi/alertmanager.yml new file mode 100644 index 0000000..52ad2b7 --- /dev/null +++ b/group_vars/metricspi/alertmanager.yml @@ -0,0 +1,24 @@ +alertmanager_config: + global: + smtp_from: prometheus@pyrocufflink.blue + smtp_smarthost: mail.pyrocufflink.blue:25 + smtp_require_tls: false + + route: + receiver: default-email + group_by: + - ... + routes: + - receiver: default-email + group_wait: 1m + group_by: + - alertname + repeat_interval: 120h + match: + job: homeassistant + + receivers: + - name: default-email + email_configs: + - to: gyrfalcon@ebonfire.com + send_resolved: true diff --git a/group_vars/metricspi/alerts.yml b/group_vars/metricspi/alerts.yml new file mode 100644 index 0000000..a084eaa --- /dev/null +++ b/group_vars/metricspi/alerts.yml @@ -0,0 +1,48 @@ +vmalert_rules: + groups: + - name: default alert + rules: + - alert: DiskUsage + expr: >- + sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df!="var-log"}) by (instance, df) > .75 + or sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df="var-log"}) by (instance, df) > .95 + for: 2h + - alert: TheWebsiteIsDown + expr: >- + probe_success{job="websites"} == 0 + for: 10m + - alert: Missing Metrics + expr: >- + up{instance!~"vmhost.*"} == 0 + for: 10m + - alert: NUT is offline + expr: >- + absent(collectd_nut_percent) + + - name: Bitwarden + rules: + - alert: vaultwarden is not running + expr: >- + collectd_processes_ps_count_processes{processes="vaultwarden"} < 1 + for: 5m + + - name: Active Directory + rules: + - alert: samba is not running + expr: >- + collectd_processes_ps_count_processes{processes=~"samba|smbd|winbindd|krb5kdc"} < 1 + for: 5m + + - name: Graylog + rules: + - alert: unprocessed messages + expr: >- + org_graylog2_journal_entries_uncommitted > 100 + for: 1h + + - name: mdraid + rules: + - alert: mdraid missing disk + expr: collectd_md_md_disks{type="missing"} != 0 + - alert: mdraid failed disk + expr: collectd_md_md_disks{type="failed"} != 0 diff --git a/group_vars/metricspi/blackbox.yml b/group_vars/metricspi/blackbox.yml new file mode 100644 index 0000000..be67e6d --- /dev/null +++ b/group_vars/metricspi/blackbox.yml @@ -0,0 +1,45 @@ +blackbox_modules: + icmp: + prober: icmp + timeout: 5s + + http: + prober: http + timeout: 5s + http: + method: GET + headers: + Accept-Language: en-US + Accept-Charset: utf-8 + Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 + + tcp: + prober: tcp + timeout: 5s + + dns_recursive: + prober: dns + timeout: 5s + dns: + query_name: news.ycombinator.com + query_type: A + + dns_pyrocufflink: + prober: dns + timeout: 2s + dns: + query_name: pyrocufflink.blue + query_type: SOA + validate_answer_rrs: + fail_if_not_matches_regexp: + - 'pyrocufflink\.blue\.\t\d+\tIN\tSOA\tdc\d+\.pyrocufflink\.blue.*' + + smtp: + prober: tcp + timeout: 5s + tcp: + query_response: + - expect: "^220 ([^ ]+) ESMTP (.+)$" + - send: 'EHLO prober\r' + - expect: '^250[ -]SMTPUTF8' + - send: 'QUIT\r' diff --git a/group_vars/metricspi/grafana.yml b/group_vars/metricspi/grafana.yml new file mode 100644 index 0000000..cf08a53 --- /dev/null +++ b/group_vars/metricspi/grafana.yml @@ -0,0 +1,18 @@ +grafana_allow_anonymous: true +grafana_allow_sign_up: false +grafana_allow_org_create: false +grafana_ldap: true +grafana_ldap_allow_sign_up: true +grafana_ldap_host: dc2.pyrocufflink.blue +grafana_ldap_use_ssl: true +grafana_ldap_root_ca_cert: /etc/ssl/certs/dch-root-ca.crt +grafana_ldap_bind_dn: CN=svc.grafana,CN=Users,DC=pyrocufflink,DC=blue +grafana_ldap_bind_password: '{{ vault_grafana_ldap_bind_password }}' +grafana_ldap_search_filter: '(sAMAccountName=%s)' +grafana_ldap_base_dn: DC=pyrocufflink,DC=blue +grafana_ldap_group_mapping: +- group_dn: CN=Grafana Admins,CN=Users,DC=pyrocufflinke,DC=blue + role: Admin + grafana_admin: true +- group_dn: '*' + role: Viewer diff --git a/group_vars/metricspi/main.yml b/group_vars/metricspi/main.yml new file mode 100644 index 0000000..02f43fd --- /dev/null +++ b/group_vars/metricspi/main.yml @@ -0,0 +1,2 @@ +vm_config: null +victoria_metrics_server_name: metrics.pyrocufflink.blue diff --git a/group_vars/metricspi/scrape.yml b/group_vars/metricspi/scrape.yml new file mode 100644 index 0000000..2faf11c --- /dev/null +++ b/group_vars/metricspi/scrape.yml @@ -0,0 +1,231 @@ +vmagent_scrape_configs: + +- job_name: vmagent + static_configs: + - targets: + - '[::1]:8429' + +- job_name: speedtest + scrape_interval: 10m + scrape_timeout: 1m + metrics_path: /probe + static_configs: + - targets: + - 172.30.0.1:9516 + +- job_name: blackbox + metrics_path: /probe + params: + module: + - icmp + static_configs: + - targets: + - 1.1.1.1 + - 8.8.8.8 + - 9.9.9.9 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: brandon + scrape_interval: 5s + metrics_path: /probe + params: + module: + - icmp + static_configs: + - targets: + - 173.172.96.1 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: websites + scrape_interval: 5m + metrics_path: /probe + params: + module: + - http + static_configs: + - targets: + - http://dustin.hatch.name/ + - https://darkchestofwonders.us/ + - http://nratonpass.com/ + - http://pyrocufflink.net/ + - http://ebonfire.com/ + - http://chmod777.sh/ + - https://hatch.chat/_matrix/client/versions + - https://nextcloud.pyrocufflink.net/ + - https://bitwarden.pyrocufflink.blue/ + - https://git.pyrocufflink.blue/ + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: sensors + scrape_interval: 10s + file_sd_configs: + - files: + - sensors.yml + +- job_name: graylog + scrape_interval: 1m + scheme: https + metrics_path: /api/plugins/org.graylog.plugins.metrics.prometheus/metrics + basic_auth: + username: >- + {{ vault_graylog_scrape_token }} + password: token + static_configs: + - targets: + - graylog.pyrocufflink.blue:443 + +- job_name: collectd + scrape_interval: 10s + honor_labels: true + static_configs: + - targets: + - gw1.pyrocufflink.blue + - vmhost0.pyrocufflink.blue + - vmhost1.pyrocufflink.blue + file_sd_configs: + - files: + - /etc/prometheus/scrape-collectd.yml + relabel_configs: + - source_labels: [__address__] + target_label: __address__ + replacement: '$1:9103' + +- job_name: homeassistant + scrape_interval: 1m + scheme: https + metrics_path: /api/prometheus + bearer_token: >- + {{ vault_homeassistant_scrape_token }} + static_configs: + - targets: + - homeassistant.pyrocufflink.blue + +- job_name: sambadc + scrape_interval: 1m + metrics_path: /probe + params: + module: + - tcp + dns_sd_configs: + - names: + - _ldap._tcp.pyrocufflink.blue + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: gitea + scrape_interval: 1m + scheme: https + static_configs: + - targets: + - git.pyrocufflink.blue + +- job_name: synapse + scrape_interval: 1m + metrics_path: /_synapse/metrics + static_configs: + - targets: + - matrix0.pyrocufflink.blue + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__address__] + target_label: __address__ + replacement: '$1:9000' + +- job_name: dns_recursive + scrape_interval: 1m + metrics_path: /probe + params: + module: + - dns_recursive + static_configs: + - targets: + - 172.30.0.4 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: dns_pyrocufflink + scrape_interval: 1m + metrics_path: /probe + params: + module: + - dns_pyrocufflink + static_configs: + - targets: + - 172.30.0.10 + - 172.30.0.9 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: smtp + scrape_interval: 1m + metrics_path: /probe + params: + module: + - smtp + dns_sd_configs: + - names: + - mail.pyrocufflink.blue + type: A + port: 25 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: '[::1]:9115' + +- job_name: unifi + scrape_interval: 1m + static_configs: + - targets: + - unifi.pyrocufflink.blue:9130 + +- job_name: jenkins + scrape_interval: 1m + metrics_path: /prometheus/ + scheme: https + static_configs: + - targets: + - jenkins.pyrocufflink.blue + +- job_name: burp + scrape_interval: 270s + scrape_timeout: 30s + static_configs: + - targets: + - burp.pyrocufflink.blue:9645 diff --git a/group_vars/metricspi/secrets b/group_vars/metricspi/secrets new file mode 100644 index 0000000..709dfcd --- /dev/null +++ b/group_vars/metricspi/secrets @@ -0,0 +1,24 @@ +$ANSIBLE_VAULT;1.1;AES256 +33663232353935666561613765313939376662396331326332373133343437343663323761633066 +3361353962646561633066353632373139666562323863330a346339323831376664636366336332 +63333662303862633938643734366662623434633562383033663637336330306161613336346135 +3566386161363438660a393436353461366433643738313661656434303462376436626162623061 +30626361623737636337383336626331633332393233656263303731313239663838313635356432 +34333733636139633766316162333266613533636234303830326632303765616330663034636430 +63373866373662383438636563643833323765653666663837663162356464326332396138656233 +33326465313437343232316132613538626537333332613531613332343566626337613835313639 +61353464633239353863313561646235316466393762306238306538376531353831643930336231 +34643366633664633937656232316166393835333461363564303938623633653432316561323032 +65666330316534306564653731366632613934343966653034393661363035356639656662613339 +36373131346561343731666331623833633932373765356263363434666566616131663833656364 +32663136303838376537346636633363666630386339633564633662343035653737616439626163 +63363234613237656639333035396539626337323932626632666663393864613063306138323465 +34343761613637656631323938323465376535613461313231323737636235396535363234646437 +34633230333236326331333738323161376230373766393336643636376339396530316632363638 +65373530613565663236666561326539343033633230616561633538313632343036346261323134 +65663166353632656635653365366664313139396562336133656336396334363063653332393136 +32663838363237626562366662383035333762366432323734616633346334646365323733303763 +38663461333431613630303330343764633963646432363537616466626133346136623535656539 +39353536633364653563366466363338643730663866626531653164663232323663653366363266 +62363535363835336262646236346637633033353731666335373663663537356362656362626265 +643234623230343334656464633134326136 diff --git a/group_vars/metricspi/vmalert.yml b/group_vars/metricspi/vmalert.yml new file mode 100644 index 0000000..edec6d6 --- /dev/null +++ b/group_vars/metricspi/vmalert.yml @@ -0,0 +1,4 @@ +vmalert_datasource_url: http://[::1]:8428 +vmalert_notifier_url: http://[::1]:9093 +vmalert_remote_read_url: http://[::1]:8428 +vmalert_remote_write_url: http://[::1]:8428 diff --git a/host_vars/mtrcs0.pyrocufflink.red.yml b/host_vars/mtrcs0.pyrocufflink.red.yml new file mode 100644 index 0000000..de12bf8 --- /dev/null +++ b/host_vars/mtrcs0.pyrocufflink.red.yml @@ -0,0 +1,8 @@ +ansible_user: root +ansible_become: false + +host_uses_firewalld: false +collectd_unixsock_plugin: false + +collectd_plugins: + thermal: true diff --git a/hosts b/hosts index 092b279..2149f5c 100644 --- a/hosts +++ b/hosts @@ -1,12 +1,18 @@ [all:vars] ansible_python_interpreter=/usr/bin/python3 +[alertmanager:children] +metricspi + [aria2] file0.pyrocufflink.blue [bitwarden_rs] bw0.pyrocufflink.blue +[blackbox-exporter:children] +metricspi + [burp-client] bw0.pyrocufflink.blue cloud0.pyrocufflink.blue @@ -18,6 +24,9 @@ burp1.pyrocufflink.blue [certbot] +[collectd] +mtrcs0.pyrocufflink.red + [collectd:children] pyrocufflink @@ -53,6 +62,7 @@ nvr1.pyrocufflink.blue git0.pyrocufflink.blue [grafana] +mtrcs0.pyrocufflink.red stats0.pyrocufflink.blue [graylog] @@ -78,6 +88,9 @@ k8s-amd64-n2.pyrocufflink.blue k8s-controller k8s-node +[metricspi] +mtrcs0.pyrocufflink.red + [motioneye] [named-server:children] @@ -156,6 +169,15 @@ matrix0.pyrocufflink.blue [victoria-metrics] +[victoria-metrics:children] +metricspi + +[vmagent:children] +victoria-metrics + +[vmalert:children] +metricspi + [vm-hosts] [wheelhost] diff --git a/metricspi.yml b/metricspi.yml new file mode 100644 index 0000000..45bcbcc --- /dev/null +++ b/metricspi.yml @@ -0,0 +1,16 @@ +- hosts: metricspi + roles: + - role: trustca + ca: dch-root-ca + # The metricspi OS does not include tools for managing the system + # CA trust store. Fortunately, the only application that needs to + # use a custom CA is Grafana, which supports using an alternate CA + # certificate path. + ca_store_dir: /etc/ssl/certs + ca_update_cmd: 'true' + tags: + - trustca + +- import_playbook: victoria-metrics.yml +- import_playbook: alertmanager.yml +- import_playbook: grafana.yml diff --git a/roles/ssh-hostkeys/files/ssh_known_hosts b/roles/ssh-hostkeys/files/ssh_known_hosts index 43999af..59faee3 100644 --- a/roles/ssh-hostkeys/files/ssh_known_hosts +++ b/roles/ssh-hostkeys/files/ssh_known_hosts @@ -85,6 +85,9 @@ matrix0.pyrocufflink.blue ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDygGMJH2ke6RQlyt motion0.pyrocufflink.blue ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBHhqW6JACT+jLFxoZxPHH2bYYVGpoxdzVNQv9zNWQxKIX4ScG+f4PXWdae7s+7lkceYVWJDRfTBN+lHxxUNPRI0= motion0.pyrocufflink.blue ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJiEe7HZmVnndPg6rxTpaD6BtgwK6Fa7QeLO2mGJK3bY motion0.pyrocufflink.blue ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC1hBM8eiUiNQGNPbgl/2V26xTVbvEM8I8VU1JzWfs3uESgBLF8J387jdqwlTL8XyZtR9J/MqXl3rM8e04dy6ZM3Kyp57VrRpBc6AEs3Dl/Y8NsFHW3E0oIfiVyNYBnsiNv8dZsVLrDaMirZdyMIpY5B3gWLcVgfHl+uzIzl/LabPYs5p3OkxFOoPG+zhUmlPE8g0Dw/WV7wT4lodhmHx24f3yyzXBXgP/XUB9CSXwlGOnjzv+SfxqFg8G2s1bEcfMLpPuXO9a0ltq4vPu4Y5oFX9vc2nN20S594O4kBWG1D+nIvnZN6zfxs/ivrHEtzAJLRTSX5zxloCNSYX0f+X4/KnaqaaZylVzFSX/riZKVrzc5TdhjOuqFwJo5yqsefWh1MkLVxjGf10ww2FBseXLakKmvLH2oooMYNCs5FdrftGrkYQh5Czj4e8UUnSttxzGNRYwrnxcQNmL9YFdmsS10ck9zGzCg/ZJrNqkGrQhZ94U/vRreiZCM2fHVQRSaku0= +mtrcs0.pyrocufflink.red ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNkyo1kZS9YyBlnzPIwp10Eux+XfpDvoxLDFvpCD3BQFmI5iATlPqRrB4St2TcRcCvKhKOhu1m9GXu/3WOhWHN4= +mtrcs0.pyrocufflink.red ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPWa6B4frT+91qyKtBKCJ+QtKHeFl2Bubl0FE06vPwul +mtrcs0.pyrocufflink.red ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC3uFV2/39EHZ+RlCSgMmt1/exVpny1DJEjZSoB0Ewo1KMoT3sI2FZieWlRKFn4eF4LkzIGfq8nwo6YuaHqlTP7ooSb3Y9+J4BMWgWOMPQ7J8HjMnnwMwloSr85anIBOug/bb0l3IZzCmQTvdeCw7XrCaYDYCPPYUj9rCv2cLwrNzPo/k0Ehey+4/j20l/9p8nAaKmEBO1XHxjPfTUwsBnuZ9wT5J3CoGI9Im5vNB2Xw36T7KiGEu8O/Zg9D1e/2vVXMSPvwl8tBrQLgZFP457TlFmgWYgOUYtV/WLeANZzoiKJKy6304svzxicmwJoymljr5hetDuxkpm41oWhM/nCddpR+2YZB8JFSUuoPBXPN3/YY5XExRP41t0PRaZYwoKc61BYM4kck2QhtEkQ4AzN4NYZ7s5g1Q9Bp+lkS9SS6SpUJ44xgGKYuvGvkUP6BsHhvdn3HZXYXcY6C/ghWdvgbAtB3/a8fbFlswUwA7GFGsWIV3y9Qv6JufgLZJagPJM= nvr0.pyrocufflink.blue ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBM3oCuxPRlvzcv9W54h+w6ff+UemoEDeVvXHRpDuAKU1467psZ2JleHPMMx4RffaAswmneIMYK756pR5i3S3Zdc= nvr0.pyrocufflink.blue ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJ7wRCZq1auppZJBU5xtIjbG93Gsqvuvk4/7dzj86qKK nvr0.pyrocufflink.blue ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDD7TPR55x0kN+jn1SJG18ryMV9C8DdSmUXszupQb/90YP43CS7/p7Ie4jRAFy1iTEUgKsIy0Ir9i7zDliMv6taVSP93+SPQSlej0YUjkGMcfL9y6T06lwGdlyXak5mdKrSMo0ACbd4hPs8NcMKlm9p3imI9WOWCG7wHHFfm/mh7QG589EK+cv2l4uwcjQnuQeJuOkmWGVXVrhu79FlPI6a/sbiOPeC0xXw7yE2D2WN8LYiGZxKHY/3gQNsJczZy97Mn0Yh2oyK+4eHRDUNbFtlQWx6vYwfe0xyKpKBvxNSI3l8c+lg6InVFo9X8EXXUYdol7udphw75+XIE1ALp7ClumZMCMTgr3JowUCPJcRC6MixvgQ1pzZ9p/4q92Rmd+UtfzvAt1fIm6QS7maMrCjynwZx5jz1mij2QFRprm1TCf4Hx1xYkzWqMHNgAv/VEV96opb0ford5tHLSqJWDaB4OGUcaSXxdkQHpqmDhfpxe+iOF2LmQt42LavGqpkqFl8=