{ pkgs, lib, config, ... }: let ircChannel = "#sterni.lv"; irccatPort = builtins.replaceStrings [ ":" ] [ "" ] config.services.depot.irccat.config.tcp.listen; send-irc-msg = pkgs.writeShellScript "send-irc-msg" '' set -euo pipefail printf '%s %s\n' ${lib.escapeShellArg ircChannel} "$1" | \ ${lib.getBin pkgs.netcat-openbsd}/bin/nc -N localhost ${irccatPort} ''; netdataPort = 19999; in { imports = [ ./irccat.nix ]; config = { services.depot.irccat.config.irc.channels = [ ircChannel ]; # Since we have irccat we can wire up mdadm --monitor boot.swraid.mdadmConf = '' PROGRAM ${ pkgs.writeShellScript "mdmonitor-to-irc" '' ${send-irc-msg} "mdmonitor: $1($2''${3:+, $3})" '' } ''; # TODO(sterni): irc notifications (?) services = { smartd = { enable = true; autodetect = true; # Short self test every day 03:00 # Long self test every tuesday 05:00 defaults.autodetected = "-a -o on -s (S/../.././03|L/../../2/05)"; extraOptions = [ "-A" "/var/log/smartd/" ]; }; netdata = { enable = true; config = { logs = { access = "syslog"; error = "syslog"; debug = "syslog"; health = "syslog"; collector = "syslog"; }; web = { "default port" = toString netdataPort; "bind to" = "localhost:${toString netdataPort}"; }; health = { "script to execute on alarm" = pkgs.writeShellScript "simple-alarm-notify" '' set -euo pipefail # This humongous list is copied over from netdata's alarm-notify.sh roles="''${1}" # the roles that should be notified for this event args_host="''${2}" # the host generated this event unique_id="''${3}" # the unique id of this event alarm_id="''${4}" # the unique id of the alarm that generated this event event_id="''${5}" # the incremental id of the event, for this alarm id when="''${6}" # the timestamp this event occurred name="''${7}" # the name of the alarm, as given in netdata health.d entries chart="''${8}" # the name of the chart (type.id) status="''${9}" # the current status : REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL old_status="''${10}" # the previous status: REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL value="''${11}" # the current value of the alarm old_value="''${12}" # the previous value of the alarm src="''${13}" # the line number and file the alarm has been configured duration="''${14}" # the duration in seconds of the previous alarm state non_clear_duration="''${15}" # the total duration in seconds this is/was non-clear units="''${16}" # the units of the value info="''${17}" # a short description of the alarm value_string="''${18}" # friendly value (with units) # shellcheck disable=SC2034 # variable is unused, but https://github.com/netdata/netdata/pull/5164#discussion_r255572947 old_value_string="''${19}" # friendly old value (with units), previously named "old_value_string" calc_expression="''${20}" # contains the expression that was evaluated to trigger the alarm calc_param_values="''${21}" # the values of the parameters in the expression, at the time of the evaluation total_warnings="''${22}" # Total number of alarms in WARNING state total_critical="''${23}" # Total number of alarms in CRITICAL state total_warn_alarms="''${24}" # List of alarms in warning state total_crit_alarms="''${25}" # List of alarms in critical state classification="''${26}" # The class field from .conf files edit_command_line="''${27}" # The command to edit the alarm, with the line number child_machine_guid="''${28}" # the machine_guid of the child transition_id="''${29}" # the transition_id of the alert summary="''${30}" # the summary text field of the alert # Verify that they haven't extended the arg list ARG_COUNT_EXPECTED=30 if [[ "$#" != "$ARG_COUNT_EXPECTED" ]]; then echo "$0: WARNING: unexpected number of arguments: $#. Did netdata add more?" >&2 fi MSG="netdata: $status ''${name//_/ } ($chart): ''${summary//_/ } = $value_string" # Filter rules by chart name. This is necessary, since the "enabled alarms" # filter only allows for filtering alarm types, not specific alarms # belonging to that alarm. case "$chart" in # netdata prefers the automatically assigned names (dm-, md, # sd) over ids for alerts, so this configuration assumes that # we have two physical disks which we kind of assert using the # grub configuration (it is more difficult with the soft raid # config). # ${assert builtins.length config.boot.loader.grub.devices == 2; ""} disk_util.sda | disk_util.sdb | disk_backlog.sda | disk_backlog.sdb) ;; disk_util.* | disk_backlog.*) echo "$0: INFO: DISCARDING message: $MSG" >&2 exit 0 ;; *) ;; esac echo "$0: INFO: sending message: $MSG" >&2 ${send-irc-msg} "$MSG" ''; }; }; }; }; }; }