Skip to content

Commit 10deab9

Browse files
authored
feat(o11y): adding an alert for the RPC providers availability (#948)
1 parent 9860d2a commit 10deab9

File tree

1 file changed

+39
-3
lines changed

1 file changed

+39
-3
lines changed

terraform/monitoring/panels/usage/provider.libsonnet

+39-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
22
local defaults = import '../../grafonnet-lib/defaults.libsonnet';
33

4-
local panels = grafana.panels;
5-
local targets = grafana.targets;
4+
local panels = grafana.panels;
5+
local targets = grafana.targets;
6+
local alert = grafana.alert;
7+
local alertCondition = grafana.alertCondition;
68

79
{
810
new(ds, vars, provider)::
@@ -14,7 +16,41 @@ local targets = grafana.targets;
1416

1517
.addTarget(targets.prometheus(
1618
datasource = ds.prometheus,
17-
expr = 'sum by(chain_id) (increase(provider_status_code_counter_total{provider="%s"}[5m]))' % provider,
19+
expr = 'sum by(chain_id) (increase(provider_status_code_counter_total{provider="%s"}[$__rate_interval]))' % provider,
1820
legendFormat = '__auto',
1921
))
22+
23+
// Hidden target for the provider availability alert
24+
25+
.addTarget(targets.prometheus(
26+
datasource = ds.prometheus,
27+
expr = '(sum(increase(provider_status_code_counter_total{provider="%s", status_code="200"}[$__rate_interval])) / sum(increase(provider_status_code_counter_total{provider="%s"}[$__rate_interval]))) * 100' % [provider, provider],
28+
legendFormat = '__auto',
29+
exemplar = false,
30+
refId = 'providerAvailabilityPercent',
31+
hide = true,
32+
))
33+
34+
.setAlert(vars.environment, alert.new(
35+
namespace = 'Blockchain API',
36+
name = "%s - Provider availability drop" % vars.environment,
37+
message = "%s - Provider availability drop" % vars.environment,
38+
period = '5m',
39+
frequency = '1m',
40+
noDataState = 'no_data',
41+
notifications = vars.notifications,
42+
alertRuleTags = {
43+
'og_priority': 'P3',
44+
},
45+
conditions = [
46+
alertCondition.new(
47+
evaluatorParams = [ 90 ],
48+
evaluatorType = 'lt',
49+
operatorType = 'or',
50+
queryRefId = 'providerAvailabilityPercent',
51+
queryTimeStart = '5m',
52+
reducerType = 'avg',
53+
),
54+
]
55+
))
2056
}

0 commit comments

Comments
 (0)