Skip to content

Commit a66c545

Browse files
authored
feat: slo freshness dashboards (#155)
1 parent 61cb194 commit a66c545

10 files changed

+402
-56
lines changed

services/setdashboards/meth_instancedeployment_deploymonitoringdashboard.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ func (instanceDeployment *InstanceDeployment) deployMonitoringDashboard() (err e
2020
dashboardDeployment := mon.NewDashboardDeployment()
2121
dashboardDeployment.Core = instanceDeployment.Core
2222
dashboardDeployment.Settings.Instance.MON = instanceDeployment.Settings.Instance.MON
23-
dashboardDeployment.Artifacts.Widgets = instanceDeployment.Artifacts.Widgets
23+
dashboardDeployment.Artifacts = instanceDeployment.Artifacts
2424
return dashboardDeployment.Deploy()
2525
}

services/setdashboards/meth_instancedeployment_situate.go

+46-6
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,60 @@
1515
package setdashboards
1616

1717
import (
18+
"encoding/json"
19+
"fmt"
20+
"math"
21+
"strings"
22+
1823
"github.com/BrunoReboul/ram/utilities/mon"
1924
"google.golang.org/api/monitoring/v1"
2025
)
2126

2227
// Situate complement settings taking in account the situation for service and instance settings
2328
func (instanceDeployment *InstanceDeployment) Situate() (err error) {
2429
instanceDeployment.Artifacts.Widgets = []*monitoring.Widget{}
25-
for _, microserviceName := range instanceDeployment.Settings.Instance.MON.MicroServiceNameList {
26-
for _, widgetType := range instanceDeployment.Settings.Instance.MON.WidgetTypeList {
27-
widget, err := mon.GetGCFWidget(microserviceName, widgetType)
28-
if err != nil {
29-
return err
30+
instanceDeployment.Artifacts.Tiles = []*monitoring.Tile{}
31+
if instanceDeployment.Settings.Instance.MON.GridLayout.Columns != 0 {
32+
for _, microserviceName := range instanceDeployment.Settings.Instance.MON.GridLayout.MicroServiceNameList {
33+
for _, widgetType := range instanceDeployment.Settings.Instance.MON.GridLayout.WidgetTypeList {
34+
widget, err := mon.GetGCFWidget(microserviceName, widgetType)
35+
if err != nil {
36+
return err
37+
}
38+
instanceDeployment.Artifacts.Widgets = append(instanceDeployment.Artifacts.Widgets, &widget)
3039
}
31-
instanceDeployment.Artifacts.Widgets = append(instanceDeployment.Artifacts.Widgets, &widget)
40+
}
41+
}
42+
if instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.SLO != 0 {
43+
grouwthFactor := math.Sqrt(2)
44+
scale := 0.01
45+
thresholdSeconds := scale * math.Pow(grouwthFactor, float64(instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.CutOffBucketNumber))
46+
var thresholdText string
47+
if thresholdSeconds < 60 {
48+
thresholdText = fmt.Sprintf("%g seconds", math.Round(thresholdSeconds))
49+
} else {
50+
if thresholdSeconds < 60*60 {
51+
thresholdText = fmt.Sprintf("%g minutes", math.Round(thresholdSeconds/60))
52+
} else {
53+
if thresholdSeconds < 60*60*60 {
54+
thresholdText = fmt.Sprintf("%g hours", math.Round(thresholdSeconds/60/60))
55+
}
56+
}
57+
}
58+
slo := instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.SLO
59+
sloText := fmt.Sprintf("%g%%", slo*100)
60+
dashboardJSON := mon.SLOFreshnessTiles
61+
dashboardJSON = strings.Replace(dashboardJSON, "<origin>", instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.Origin, -1)
62+
dashboardJSON = strings.Replace(dashboardJSON, "<scope>", instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.Scope, -1)
63+
dashboardJSON = strings.Replace(dashboardJSON, "<flow>", instanceDeployment.Settings.Instance.MON.SLOFreshnessLayout.Flow, -1)
64+
dashboardJSON = strings.Replace(dashboardJSON, "<slo>", fmt.Sprintf("%g", slo), -1)
65+
dashboardJSON = strings.Replace(dashboardJSON, "<lowerBound>", fmt.Sprintf("%g", math.Floor(slo*10)/10), -1)
66+
dashboardJSON = strings.Replace(dashboardJSON, "<thresholdSeconds>", fmt.Sprintf("%v", thresholdSeconds), -1)
67+
dashboardJSON = strings.Replace(dashboardJSON, "<thresholdText>", thresholdText, -1)
68+
dashboardJSON = strings.Replace(dashboardJSON, "<sloText>", sloText, -1)
69+
err = json.Unmarshal([]byte(dashboardJSON), &instanceDeployment.Artifacts.Tiles)
70+
if err != nil {
71+
return fmt.Errorf("json.Unmarshal SLOFreshnessTiles %v", err)
3272
}
3373
}
3474
return nil

services/setdashboards/type_instancedeployment.go

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type InstanceDeployment struct {
3131
DumpTimestamp time.Time `yaml:"dumpTimestamp"`
3232
Artifacts struct {
3333
Widgets []*monitoring.Widget
34+
Tiles []*monitoring.Tile
3435
}
3536
Core *deploy.Core
3637
Settings struct {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
// Copyright 2020 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the 'License');
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an 'AS IS' BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package mon
16+
17+
// SLOFreshnessTiles freshness SLO dashboard JSON template
18+
const SLOFreshnessTiles = `
19+
[
20+
{
21+
"height": 2,
22+
"width": 4,
23+
"widget": {
24+
"title": "<scope> <flow> <sloText> < <thresholdText>",
25+
"text": {
26+
"content": "**Freshness**: <sloText> of <scope> configurations from <flow> flow over the last 28 days should be analyzed in less than <thresholdText>.",
27+
"format": "MARKDOWN"
28+
}
29+
}
30+
},
31+
{
32+
"height": 2,
33+
"width": 3,
34+
"xPos": 4,
35+
"widget": {
36+
"title": "SLI vs SLO",
37+
"scorecard": {
38+
"gaugeView": {
39+
"lowerBound": <lowerBound>,
40+
"upperBound": 1.0
41+
},
42+
"thresholds": [
43+
{
44+
"color": "RED",
45+
"direction": "BELOW",
46+
"value": <slo>
47+
}
48+
],
49+
"timeSeriesQuery": {
50+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| fraction_less_than_from <thresholdSeconds>"
51+
}
52+
}
53+
}
54+
},
55+
{
56+
"height": 2,
57+
"width": 3,
58+
"xPos": 7,
59+
"widget": {
60+
"title": "Remaining ERROR BUDGET",
61+
"scorecard": {
62+
"thresholds": [
63+
{
64+
"color": "YELLOW",
65+
"direction": "BELOW",
66+
"value": 0.1
67+
}
68+
],
69+
"timeSeriesQuery": {
70+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| fraction_less_than_from <thresholdSeconds>\n| neg\n| add 1\n| div 0.01\n| neg\n| add 1"
71+
}
72+
}
73+
}
74+
},
75+
{
76+
"height": 2,
77+
"width": 2,
78+
"xPos": 10,
79+
"widget": {
80+
"title": "Configurations analyzed in 28 days",
81+
"scorecard": {
82+
"sparkChartView": {
83+
"sparkChartType": "SPARK_LINE"
84+
},
85+
"timeSeriesQuery": {
86+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| count_from"
87+
}
88+
}
89+
}
90+
},
91+
{
92+
"height": 9,
93+
"width": 3,
94+
"xPos": 9,
95+
"yPos": 2,
96+
"widget": {
97+
"title": "Last 28days heatmap",
98+
"xyChart": {
99+
"chartOptions": {
100+
"mode": "COLOR"
101+
},
102+
"dataSets": [
103+
{
104+
"plotType": "HEATMAP",
105+
"timeSeriesQuery": {
106+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter (metric.microservice_name == 'stream2bq')\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| graph_period 28d"
107+
}
108+
}
109+
],
110+
"timeshiftDuration": "0s",
111+
"yAxis": {
112+
"label": "y1Axis",
113+
"scale": "LINEAR"
114+
}
115+
}
116+
}
117+
},
118+
{
119+
"height": 3,
120+
"width": 9,
121+
"yPos": 2,
122+
"widget": {
123+
"title": "Error budget burnrate on 7d sliding windows - Email when > 1.5",
124+
"xyChart": {
125+
"chartOptions": {
126+
"mode": "COLOR"
127+
},
128+
"dataSets": [
129+
{
130+
"plotType": "LINE",
131+
"timeSeriesQuery": {
132+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n|filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(1m)\n| every 1m\n| group_by [metric.microservice_name], sliding(7d)\n| fraction_less_than_from <thresholdSeconds>\n| neg\n| add 1\n| div 0.01\n| cast_units \"1\""
133+
}
134+
}
135+
],
136+
"thresholds": [
137+
{
138+
"value": 1.5
139+
}
140+
],
141+
"timeshiftDuration": "0s",
142+
"yAxis": {
143+
"label": "y1Axis",
144+
"scale": "LINEAR"
145+
}
146+
}
147+
}
148+
},
149+
{
150+
"height": 3,
151+
"width": 9,
152+
"yPos": 5,
153+
"widget": {
154+
"title": "Error budget burnrate on 12h sliding windows - Alert when > 3",
155+
"xyChart": {
156+
"chartOptions": {
157+
"mode": "COLOR"
158+
},
159+
"dataSets": [
160+
{
161+
"plotType": "LINE",
162+
"timeSeriesQuery": {
163+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n|filter metric.microservice_name == 'stream2bq'\n| filter metric.origin == '<origin>'\n| align delta(1m)\n| every 1m\n| group_by [metric.microservice_name], sliding(12h)\n| fraction_less_than_from <thresholdSeconds>\n| neg\n| add 1\n| div 0.01\n| cast_units \"1\""
164+
}
165+
}
166+
],
167+
"thresholds": [
168+
{
169+
"value": 3.0
170+
}
171+
],
172+
"timeshiftDuration": "0s",
173+
"yAxis": {
174+
"label": "y1Axis",
175+
"scale": "LINEAR"
176+
}
177+
}
178+
}
179+
},
180+
{
181+
"height": 3,
182+
"width": 9,
183+
"yPos": 8,
184+
"widget": {
185+
"title": "Error budget burnrate on 1h sliding windows - Alert when > 9",
186+
"xyChart": {
187+
"chartOptions": {
188+
"mode": "COLOR"
189+
},
190+
"dataSets": [
191+
{
192+
"plotType": "LINE",
193+
"timeSeriesQuery": {
194+
"timeSeriesQueryLanguage": "fetch cloud_function::logging.googleapis.com/user/ram_latency_e2e\n| filter (metric.microservice_name == 'stream2bq')\n| filter metric.origin == '<origin>'\n| align delta(28d)\n| every 28d\n| within 28d\n| group_by [metric.microservice_name]\n| graph_period 28d"
195+
}
196+
}
197+
],
198+
"thresholds": [
199+
{
200+
"value": 9.0
201+
}
202+
],
203+
"timeshiftDuration": "0s",
204+
"yAxis": {
205+
"label": "y1Axis",
206+
"scale": "LINEAR"
207+
}
208+
}
209+
}
210+
}
211+
]
212+
`

0 commit comments

Comments
 (0)