Skip to content

Commit 962a7a2

Browse files
authored
chore: average alerts across namespace for 1 hour (#10827)
We now require the *average* of the increase in proven chain across a namespace to be 0 for an *hour* to trigger a slack alert.
1 parent 4ac13e6 commit 962a7a2

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

spartan/metrics/terraform/grafana.tf

+6-6
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,11 @@ resource "grafana_mute_timing" "mute_timing_always" {
5959
}
6060
}
6161

62-
resource "grafana_rule_group" "rule_group_minutely" {
62+
resource "grafana_rule_group" "rule_group_hourly" {
6363
org_id = 1
64-
name = "minutely-evaluation-group"
64+
name = "hourly-evaluation-group"
6565
folder_uid = grafana_folder.rule_folder.uid
66-
interval_seconds = 60
66+
interval_seconds = 3600
6767

6868
rule {
6969
name = "Proven Chain is Live"
@@ -81,7 +81,7 @@ resource "grafana_rule_group" "rule_group_minutely" {
8181
model = jsonencode({
8282
disableTextWrap = false,
8383
editorMode = "code",
84-
expr = "increase(aztec_archiver_block_height{aztec_status=\"proven\"}[30m])",
84+
expr = "avg by(k8s_namespace_name) (increase(aztec_archiver_block_height{aztec_status=\"proven\"}[60m]))",
8585
fullMetaSearch = false,
8686
includeNullMetadata = true,
8787
instant = true,
@@ -118,15 +118,15 @@ resource "grafana_rule_group" "rule_group_minutely" {
118118
expression = "A",
119119
intervalMs = 1000,
120120
maxDataPoints = 43200,
121-
refId = "C",
121+
refId = "B",
122122
type = "threshold"
123123
}
124124
)
125125
}
126126

127127
no_data_state = "NoData"
128128
exec_err_state = "Error"
129-
for = "1m"
129+
for = "1h"
130130
annotations = {}
131131
labels = {}
132132
is_paused = false

0 commit comments

Comments
 (0)