Skip to content

Commit c6b65ab

Browse files
authored
feat: gating test (#9918)
Main feature of this branch (unlike the branch name indicates) is the `gating_passive.test.ts`. This test: - updates the aztec network deployment, allowing validators to use each other as boot nodes - applies the "network-requirements" network shaping - permanently disables the boot node - runs 3 epochs during which it: - kills 25% of the validators - asserts that we miss less than 50% of slots Other work in this branch includes: - add `ignoreDroppedReceiptsFor` TX wait options - this allows sending a TX to one node, and awaiting it on another since we need time for p2p propagation - we need this since we have shifted the PXE to point at the top-level validator service, which load balances across individuals - this may help with #9613 - scalable loki deployment for prod - more visible logging for core sequencer operations - better error handling during the setup of l2 contracts - better error handling in the pxe - rename the network shaping charts to "aztec-chaos-scenarios" Fix #9713 Fix #9883
1 parent e740d42 commit c6b65ab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+769
-239
lines changed

.github/workflows/ci.yml

+4
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,10 @@ jobs:
715715
values: 16-validators
716716
runner_type: 16core-tester-x86
717717
timeout: 60
718+
- test: gating-passive.test.ts
719+
values: 16-validators
720+
runner_type: 16core-tester-x86
721+
timeout: 60
718722
steps:
719723
- uses: actions/checkout@v4
720724
with: { ref: "${{ env.GIT_COMMIT }}" }
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: v2
2+
name: aztec-chaos-scenarios
3+
description: Chaos scenarios for spartan using chaos-mesh
4+
type: application
5+
version: 0.1.0
6+
appVersion: "1.0.0"

spartan/network-shaping/templates/_helpers.tpl spartan/aztec-chaos-scenarios/templates/_helpers.tpl

+3-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{{/*
22
Create a default fully qualified app name.
33
*/}}
4-
{{- define "network-shaping.fullname" -}}
4+
{{- define "aztec-chaos-scenarios.fullname" -}}
55
{{- if .Values.fullnameOverride }}
66
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
77
{{- else }}
@@ -14,17 +14,12 @@ Create a default fully qualified app name.
1414
{{- end }}
1515
{{- end }}
1616

17-
{{/*
18-
Selector labels
19-
*/}}
20-
{{- define "chaos-mesh.selectorLabels" -}}
21-
{{- end }}
2217

2318
{{/*
2419
Common labels
2520
*/}}
26-
{{- define "network-shaping.labels" -}}
27-
app.kubernetes.io/name: {{ include "network-shaping.fullname" . }}
21+
{{- define "aztec-chaos-scenarios.labels" -}}
22+
app.kubernetes.io/name: {{ include "aztec-chaos-scenarios.fullname" . }}
2823
app.kubernetes.io/instance: {{ .Release.Name }}
2924
{{- if .Chart.AppVersion }}
3025
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{{- if .Values.bootNodeFailure.enabled }}
2+
---
3+
apiVersion: chaos-mesh.org/v1alpha1
4+
kind: PodChaos
5+
metadata:
6+
name: {{ .Values.global.targetNamespace }}-boot-node-failure
7+
namespace: {{ .Values.global.chaosMeshNamespace }}
8+
labels:
9+
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
10+
annotations:
11+
"helm.sh/resource-policy": keep
12+
spec:
13+
action: pod-failure
14+
mode: all
15+
selector:
16+
namespaces:
17+
- {{ .Values.global.targetNamespace }}
18+
labelSelectors:
19+
app: boot-node
20+
duration: {{ .Values.bootNodeFailure.duration }}
21+
{{- end }}

spartan/network-shaping/templates/network-chaos.yaml spartan/aztec-chaos-scenarios/templates/network-shaping.yaml

+3-25
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ metadata:
77
name: {{ .Values.global.targetNamespace }}-latency
88
namespace: {{ .Values.global.chaosMeshNamespace }}
99
labels:
10-
{{- include "network-shaping.labels" . | nindent 4 }}
10+
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
1111
annotations:
1212
"helm.sh/hook": post-install,post-upgrade
1313
"helm.sh/hook-weight": "0"
@@ -31,7 +31,7 @@ metadata:
3131
name: {{ .Values.global.targetNamespace }}-bandwidth
3232
namespace: {{ .Values.global.chaosMeshNamespace }}
3333
labels:
34-
{{- include "network-shaping.labels" . | nindent 4 }}
34+
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
3535
annotations:
3636
"helm.sh/hook": post-install,post-upgrade
3737
"helm.sh/hook-weight": "0"
@@ -57,7 +57,7 @@ metadata:
5757
name: {{ .Values.global.targetNamespace }}-packet-loss
5858
namespace: {{ .Values.global.chaosMeshNamespace }}
5959
labels:
60-
{{- include "network-shaping.labels" . | nindent 4 }}
60+
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
6161
annotations:
6262
"helm.sh/resource-policy": keep
6363
spec:
@@ -72,26 +72,4 @@ spec:
7272
duration: 8760h
7373
{{- end }}
7474

75-
{{- if .Values.networkShaping.conditions.killProvers.enabled }}
76-
---
77-
apiVersion: chaos-mesh.org/v1alpha1
78-
kind: PodChaos
79-
metadata:
80-
name: {{ .Values.global.targetNamespace }}-kill-provers
81-
namespace: {{ .Values.global.chaosMeshNamespace }}
82-
labels:
83-
{{- include "network-shaping.labels" . | nindent 4 }}
84-
annotations:
85-
"helm.sh/resource-policy": keep
86-
spec:
87-
action: pod-failure
88-
mode: all
89-
selector:
90-
namespaces:
91-
- {{ .Values.global.targetNamespace }}
92-
labelSelectors:
93-
app: prover-node
94-
duration: {{ .Values.networkShaping.conditions.killProvers.duration }}
9575
{{- end }}
96-
97-
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{{- if .Values.proverFailure.enabled }}
2+
---
3+
apiVersion: chaos-mesh.org/v1alpha1
4+
kind: PodChaos
5+
metadata:
6+
name: {{ .Values.global.targetNamespace }}-prover-failure
7+
namespace: {{ .Values.global.chaosMeshNamespace }}
8+
labels:
9+
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
10+
annotations:
11+
"helm.sh/resource-policy": keep
12+
spec:
13+
action: pod-failure
14+
mode: all
15+
selector:
16+
namespaces:
17+
- {{ .Values.global.targetNamespace }}
18+
labelSelectors:
19+
app: prover-node
20+
duration: {{ .Values.proverFailure.duration }}
21+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{{- if .Values.validatorKill.enabled }}
2+
---
3+
apiVersion: chaos-mesh.org/v1alpha1
4+
kind: PodChaos
5+
metadata:
6+
name: {{ .Values.global.targetNamespace }}-validator-kill
7+
namespace: {{ .Values.global.chaosMeshNamespace }}
8+
labels:
9+
{{- include "aztec-chaos-scenarios.labels" . | nindent 4 }}
10+
annotations:
11+
"helm.sh/resource-policy": keep
12+
spec:
13+
action: pod-kill
14+
mode: fixed-percent
15+
value: {{ .Values.validatorKill.percent | quote }}
16+
selector:
17+
namespaces:
18+
- {{ .Values.global.targetNamespace }}
19+
labelSelectors:
20+
app: validator
21+
{{- end }}

spartan/network-shaping/values.yaml spartan/aztec-chaos-scenarios/values.yaml

+16-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
nameOverride: null
2+
fullnameOverride: null
3+
14
global:
25
# When deploying, override the namespace to where spartan will deploy to, this will apply all chaos experiments to all pods within that namespace
36
# run deployment with --values global.namespace=your-namespace
@@ -7,12 +10,12 @@ global:
710
# Network shaping configuration
811
networkShaping:
912
# Master switch to enable network shaping
10-
enabled: true
13+
enabled: false
1114

1215
# Default settings
1316
defaultSettings:
1417
mode: all
15-
# Set duration to 1 year so the the experiment will run indefinitely unless overridden
18+
# Set duration to 1 year so the experiment will run indefinitely unless overridden
1619
duration: 8760h
1720

1821
# Network conditions to apply
@@ -62,9 +65,17 @@ networkShaping:
6265
# Buffer = smoother bandwidth restriction but higher memory usage
6366
buffer: 1000
6467

65-
killProvers:
66-
enabled: false
67-
duration: 13m
68+
proverFailure:
69+
enabled: false
70+
duration: 13m
71+
72+
validatorKill:
73+
enabled: false
74+
percent: 30
75+
76+
bootNodeFailure:
77+
enabled: false
78+
duration: 60m
6879
## Here are some exciting example configurations created by claude:
6980
# Example use cases for different configurations:
7081

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
global:
2+
namespace: "smoke"
3+
4+
bootNodeFailure:
5+
enabled: true
6+
duration: 60m

spartan/network-shaping/values/moderate.yaml spartan/aztec-chaos-scenarios/values/moderate.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ networkShaping:
2222
packetLoss:
2323
enabled: true
2424
loss: "0.5"
25-
correlation: "60"
25+
correlation: "60"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Imposes the network conditions that are stated as requirements for node operators
2+
global:
3+
namespace: "smoke"
4+
5+
networkShaping:
6+
enabled: true
7+
conditions:
8+
latency:
9+
enabled: true
10+
delay:
11+
# Regional network latency (e.g., cross-country)
12+
latency: 100ms
13+
jitter: 20ms
14+
correlation: "75"
15+
bandwidth:
16+
enabled: true
17+
rate: 250mbps
18+
limit: 125000000
19+
buffer: 25000
20+
packetLoss:
21+
enabled: true
22+
loss: "0.5"
23+
correlation: "60"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
global:
2+
namespace: "smoke"
3+
4+
proverFailure:
5+
enabled: true
6+
duration: 13m
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
global:
2+
namespace: "smoke"
3+
4+
validatorKill:
5+
enabled: true
6+
percent: 25

spartan/aztec-network/templates/_helpers.tpl

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ http://{{ include "aztec-network.fullname" . }}-boot-node-0.{{ include "aztec-ne
7878
{{- if .Values.validator.externalTcpHost -}}
7979
http://{{ .Values.validator.externalTcpHost }}:{{ .Values.validator.service.nodePort }}
8080
{{- else -}}
81-
http://{{ include "aztec-network.fullname" . }}-validator-0.{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.validator.service.nodePort }}
81+
http://{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.validator.service.nodePort }}
8282
{{- end -}}
8383
{{- end -}}
8484

spartan/aztec-network/templates/prover-node.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ spec:
1818
app: prover-node
1919
spec:
2020
initContainers:
21-
- name: wait-for-boot-node
21+
- name: wait-for-services
2222
image: {{ .Values.images.curl.image }}
2323
command:
2424
- /bin/sh

spartan/aztec-network/templates/pxe.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ spec:
4949
- name: ETHEREUM_HOST
5050
value: {{ include "aztec-network.ethereumHost" . | quote }}
5151
- name: AZTEC_NODE_URL
52-
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
52+
value: {{ include "aztec-network.validatorUrl" . | quote }}
5353
- name: LOG_JSON
5454
value: "1"
5555
- name: LOG_LEVEL

spartan/aztec-network/templates/setup-l2-contracts.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ spec:
3232
echo "L2 contracts initialized"
3333
env:
3434
- name: PXE_URL
35-
value: {{ include "aztec-network.pxeUrl" . | quote }}
35+
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
3636
- name: DEBUG
3737
value: "aztec:*"
3838
- name: LOG_LEVEL

spartan/aztec-network/templates/transaction-bot.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ spec:
2525
{{- if .Values.bot.nodeUrl }}
2626
value: "{{ .Values.bot.nodeUrl }}"
2727
{{- else }}
28-
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
28+
value: {{ include "aztec-network.validatorUrl" . | quote }}
2929
{{- end }}
3030
command:
3131
- /bin/sh
@@ -47,7 +47,7 @@ spec:
4747
{{- if .Values.bot.nodeUrl }}
4848
value: "{{ .Values.bot.nodeUrl }}"
4949
{{- else }}
50-
value: {{ include "aztec-network.bootNodeUrl" . | quote }}
50+
value: {{ include "aztec-network.validatorUrl" . | quote }}
5151
{{- end }}
5252
- name: LOG_JSON
5353
value: "1"

0 commit comments

Comments
 (0)