~sircmpwn/metrics.sr.ht

72b878212bd00d8b73ff831877df8160d02eadfa — Drew DeVault a month ago 0cb85e8
*_rules.yml: s/alarming/urgent/

This conveys the intention a bit better.
2 files changed, 5 insertions(+), 5 deletions(-)

M meta_rules.yml
M node_rules.yml
M meta_rules.yml => meta_rules.yml +1 -1
@@ 12,7 12,7 @@ groups:
  - alert: High rate of password resets
    expr: delta(meta_pw_resets_total[10m]) > 5
    labels:
      severity: alarming
      severity: urgent
      security: true
    annotations:
      summary: "Unusual number of failed logins"

M node_rules.yml => node_rules.yml +4 -4
@@ 6,7 6,7 @@ groups:
    expr: up == 0
    for: 5m
    labels:
      severity: alarming
      severity: urgent
    annotations:
      summary: "Instance {{ $labels.instance }} is down"
  - alert: Instance rebooted


@@ 18,7 18,7 @@ groups:
  - alert: Read-only filesystem
    expr: node_filesystem_readonly{mountpoint="/|/var"}
    labels:
      severity: alarming
      severity: urgent
    annotations:
      summary: "Instance {{ $labels.instance }} read-only filesystem on {{ $labels.mountpoint }}"
  - alert: High disk usage


@@ 39,7 39,7 @@ groups:
      > 0.80
    for: 5m
    labels:
      severity: alarming
      severity: urgent
    annotations:
      summary: "Instance {{ $labels.instance }} has tmpfs usage"
  - alert: High CPU usage


@@ 63,7 63,7 @@ groups:
    << : &prolonged
       for: 60m
       labels:
         severity: alarming
         severity: urgent
    annotations:
      summary: "Instance {{ $labels.instance }} is under sustained high CPU usage"
  - alert: High network activity