Skip to content

Commit

Permalink
feat: provision alerts (#12561)
Browse files Browse the repository at this point in the history
This PR updates the in-repo dashboards to the latest version and adds
alerting rules.

A new secret is added to Gcloud to hold the webhook URL for a Slack
channel where we want alerts to fire to.

One thing that needs to kept in mind if editing the rules is that
Grafana templates need to be escaped otherwise Helm will try to execute
them and fail (use `` {{ ` escaped content {{ $some_grafan_var }} `
}}``)
  • Loading branch information
alexghr authored Mar 7, 2025
1 parent 7a41843 commit 2ea1767
Show file tree
Hide file tree
Showing 13 changed files with 2,075 additions and 3,514 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/metrics-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ on:
required: true
type: string
default: "grafana-dashboard-password"
slack_webhook_url_secret_name:
description: The name of the secret which holds the Slack webhook URL
required: true
type: string
default: "slack-webhook-url"
secrets:
GCP_SA_KEY:
required: true
Expand Down Expand Up @@ -70,6 +75,10 @@ on:
description: The name of the secret which holds the Grafana dashboard password
required: true
default: "grafana-dashboard-password"
slack_webhook_url_secret_name:
description: The name of the secret which holds the Slack webhook URL
required: true
default: "slack-webhook-url"

jobs:
metrics_deployment:
Expand All @@ -89,6 +98,7 @@ jobs:
TF_STATE_BUCKET: aztec-terraform
GKE_CLUSTER_CONTEXT: "gke_testnet-440309_us-west1-a_${{ inputs.cluster }}"
GRAFANA_DASHBOARD_PASSWORD_SECRET_NAME: ${{ inputs.grafana_dashboard_password_secret_name }}
SLACK_WEBHOOK_URL_SECRET_NAME: ${{ inputs.slack_webhook_url_name }}

steps:
- name: Checkout code
Expand Down Expand Up @@ -118,6 +128,12 @@ jobs:
echo "::add-mask::$(gcloud secrets versions access latest --secret=${{ env.GRAFANA_DASHBOARD_PASSWORD_SECRET_NAME }})"
echo "grafana_dashboard_password=$(gcloud secrets versions access latest --secret=${{ env.GRAFANA_DASHBOARD_PASSWORD_SECRET_NAME }})" >> "$GITHUB_OUTPUT"
- name: Grab the Slack webhook URL
id: get-slack-webhook-url
run: |
echo "::add-mask::$(gcloud secrets versions access latest --secret=${{ env.SLACK_WEBHOOK_URL_SECRET_NAME }})"
echo "slack_webhook_url=$(gcloud secrets versions access latest --secret=${{ env.SLACK_WEBHOOK_URL_SECRET_NAME }})" >> "$GITHUB_OUTPUT"
- name: Ensure Terraform state bucket exists
run: |
if ! gsutil ls gs://${{ env.TF_STATE_BUCKET }} >/dev/null 2>&1; then
Expand Down Expand Up @@ -155,6 +171,7 @@ jobs:
-var="VALUES_FILE=${{ env.VALUES_FILE }}" \
-var="GKE_CLUSTER_CONTEXT=${{ env.GKE_CLUSTER_CONTEXT }}" \
-var="GRAFANA_DASHBOARD_PASSWORD=${{ steps.get-grafana-dashboard-password.outputs.grafana_dashboard_password }}" \
-var="SLACK_WEBHOOK_URL=${{ steps.get-slack-webhook-url.outputs.slack_webhook_url }}" \
-lock=${{ inputs.respect_tf_lock }}
- name: Terraform Plan
Expand All @@ -165,6 +182,7 @@ jobs:
-var="VALUES_FILE=${{ env.VALUES_FILE }}" \
-var="GKE_CLUSTER_CONTEXT=${{ env.GKE_CLUSTER_CONTEXT }}" \
-var="GRAFANA_DASHBOARD_PASSWORD=${{ steps.get-grafana-dashboard-password.outputs.grafana_dashboard_password }}" \
-var="SLACK_WEBHOOK_URL=${{ steps.get-slack-webhook-url.outputs.slack_webhook_url }}" \
-out=tfplan \
-lock=${{ inputs.respect_tf_lock }}
Expand Down
26 changes: 22 additions & 4 deletions spartan/metrics/copy-dashboard.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,26 @@ cd "$(dirname "${BASH_SOURCE[0]}")"

cp values.tmp.yaml values.yaml

for dashboard in ./grafana_dashboards/*.json; do
dashboard_name=$(basename "$dashboard" .json)
export DASHBOARD_JSON=$(jq -c '.' "$dashboard")
yq -i ".grafana.dashboards.default.\"$dashboard_name\".json = strenv(DASHBOARD_JSON)" values.yaml
for dashboard_file in ./grafana/dashboards/*.json; do
full_filename=$(basename "$dashboard_file" .json)

# Extract folder name and dashboard name using underscore as separator
# Format: foldername_dashboardname.json
if [[ "$full_filename" == *"_"* ]]; then
folder_name=${full_filename%%_*}
dashboard_name=${full_filename#*_}
else
# If no underscore, use "default" as the folder
folder_name="default"
dashboard_name=$full_filename
fi

export dashboard_content=$(jq -c '.' "$dashboard_file")
yq -i ".grafana.dashboards.${folder_name}.${dashboard_name}.json = strenv(dashboard_content)" values.yaml
done

for file in ./grafana/alerts/*.yaml; do
file_name=$(basename "$file" .yaml)
export file_content=$(cat "$file" )
yq -i ".grafana.alerting.\"${file_name}.yaml\" = env(file_content)" values.yaml
done
11 changes: 11 additions & 0 deletions spartan/metrics/grafana/alerts/contactpoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: 1
contactPoints:
- orgId: 1
name: 'Slack #network-alerts channel'
receivers:
- uid: deexubp9hzpc1b
type: slack
settings:
url: $SLACK_WEBHOOK_URL
disableResolveMessage: false

11 changes: 11 additions & 0 deletions spartan/metrics/grafana/alerts/policies.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: 1
policies:
- orgId: 1
receiver: 'Slack #network-alerts channel'
object_matchers:
- - k8s_namespace_name
- =~
- $PRODUCTION_NAMESPACES_REGEX
group_by:
- grafana_folder
- alertname
Loading

0 comments on commit 2ea1767

Please sign in to comment.