Skip to content

Commit 9f964d6

Browse files
committed
Improve the failure mode of timeout_test.
I notice that the failure mode for `timeout_test.go` tests on flakes seems to often be hitting the Go `-timeout=X` limit, which means that without `-v` you get no logs for the test. This change makes the tests use a context with a Timeout, and makes the various `Wait` functions check the `context.Done()` and return `context.Err()` to support the timeout terminating the test earlier than the above and producing logs (other than an ugly panic!).
1 parent d876923 commit 9f964d6

File tree

5 files changed

+81
-35
lines changed

5 files changed

+81
-35
lines changed

test/init_test.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,15 @@ func setup(ctx context.Context, t *testing.T, fn ...func(context.Context, *testi
7272
return c, namespace
7373
}
7474

75-
func header(logf logging.FormatLogger, text string) {
75+
func header(t *testing.T, text string) {
76+
t.Helper()
7677
left := "### "
7778
right := " ###"
7879
txt := left + text + right
7980
bar := strings.Repeat("#", len(txt))
80-
logf(bar)
81-
logf(txt)
82-
logf(bar)
81+
t.Logf(bar)
82+
t.Logf(txt)
83+
t.Logf(bar)
8384
}
8485

8586
func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string) {
@@ -88,14 +89,14 @@ func tearDown(ctx context.Context, t *testing.T, cs *clients, namespace string)
8889
return
8990
}
9091
if t.Failed() {
91-
header(t.Logf, fmt.Sprintf("Dumping objects from %s", namespace))
92+
header(t, fmt.Sprintf("Dumping objects from %s", namespace))
9293
bs, err := getCRDYaml(ctx, cs, namespace)
9394
if err != nil {
9495
t.Error(err)
9596
} else {
9697
t.Log(string(bs))
9798
}
98-
header(t.Logf, fmt.Sprintf("Dumping logs from Pods in the %s", namespace))
99+
header(t, fmt.Sprintf("Dumping logs from Pods in the %s", namespace))
99100
taskruns, err := cs.TaskRunClient.List(ctx, metav1.ListOptions{})
100101
if err != nil {
101102
t.Errorf("Error getting TaskRun list %s", err)

test/timeout_test.go

+15-17
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,14 @@ import (
3838
// verify that pipelinerun timeout works and leads to the the correct TaskRun statuses
3939
// and pod deletions.
4040
func TestPipelineRunTimeout(t *testing.T) {
41-
ctx := context.Background()
42-
ctx, cancel := context.WithCancel(ctx)
41+
// cancel the context after we have waited a suitable buffer beyond the given deadline.
42+
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
4343
defer cancel()
4444
c, namespace := setup(ctx, t)
4545
t.Parallel()
4646

47-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
48-
defer tearDown(ctx, t, c, namespace)
47+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
48+
defer tearDown(context.Background(), t, c, namespace)
4949

5050
t.Logf("Creating Task in namespace %s", namespace)
5151
task := &v1beta1.Task{
@@ -171,14 +171,13 @@ func TestPipelineRunTimeout(t *testing.T) {
171171

172172
// TestStepTimeout is an integration test that will verify a Step can be timed out.
173173
func TestStepTimeout(t *testing.T) {
174-
ctx := context.Background()
175-
ctx, cancel := context.WithCancel(ctx)
174+
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
176175
defer cancel()
177176
c, namespace := setup(ctx, t)
178177
t.Parallel()
179178

180-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
181-
defer tearDown(ctx, t, c, namespace)
179+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
180+
defer tearDown(context.Background(), t, c, namespace)
182181

183182
t.Logf("Creating Task with Step step-no-timeout, Step step-timeout, and Step step-canceled in namespace %s", namespace)
184183

@@ -243,14 +242,14 @@ func TestStepTimeout(t *testing.T) {
243242

244243
// TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out.
245244
func TestTaskRunTimeout(t *testing.T) {
246-
ctx := context.Background()
247-
ctx, cancel := context.WithCancel(ctx)
245+
timeout := 30 * time.Second
246+
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
248247
defer cancel()
249248
c, namespace := setup(ctx, t)
250249
t.Parallel()
251250

252-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
253-
defer tearDown(ctx, t, c, namespace)
251+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
252+
defer tearDown(context.Background(), t, c, namespace)
254253

255254
t.Logf("Creating Task and TaskRun in namespace %s", namespace)
256255
task := &v1beta1.Task{
@@ -272,7 +271,7 @@ func TestTaskRunTimeout(t *testing.T) {
272271
TaskRef: &v1beta1.TaskRef{Name: "giraffe"},
273272
// Do not reduce this timeout. Taskrun e2e test is also verifying
274273
// if reconcile is triggered from timeout handler and not by pod informers
275-
Timeout: &metav1.Duration{Duration: 30 * time.Second},
274+
Timeout: &metav1.Duration{Duration: timeout},
276275
},
277276
}
278277
if _, err := c.TaskRunClient.Create(ctx, taskRun, metav1.CreateOptions{}); err != nil {
@@ -300,14 +299,13 @@ func TestTaskRunTimeout(t *testing.T) {
300299
}
301300

302301
func TestPipelineTaskTimeout(t *testing.T) {
303-
ctx := context.Background()
304-
ctx, cancel := context.WithCancel(ctx)
302+
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
305303
defer cancel()
306304
c, namespace := setup(ctx, t)
307305
t.Parallel()
308306

309-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
310-
defer tearDown(ctx, t, c, namespace)
307+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
308+
defer tearDown(context.Background(), t, c, namespace)
311309

312310
t.Logf("Creating Tasks in namespace %s", namespace)
313311
task1 := &v1beta1.Task{

test/v1alpha1/timeout_test.go

+9-12
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,13 @@ import (
3737
// verify that pipelinerun timeout works and leads to the the correct TaskRun statuses
3838
// and pod deletions.
3939
func TestPipelineRunTimeout(t *testing.T) {
40-
ctx := context.Background()
41-
ctx, cancel := context.WithCancel(ctx)
40+
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
4241
defer cancel()
4342
c, namespace := setup(ctx, t)
4443
t.Parallel()
4544

46-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
47-
defer tearDown(ctx, t, c, namespace)
45+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
46+
defer tearDown(context.Background(), t, c, namespace)
4847

4948
t.Logf("Creating Task in namespace %s", namespace)
5049
task := tb.Task("banana", tb.TaskSpec(
@@ -140,14 +139,13 @@ func TestPipelineRunTimeout(t *testing.T) {
140139

141140
// TestTaskRunTimeout is an integration test that will verify a TaskRun can be timed out.
142141
func TestTaskRunTimeout(t *testing.T) {
143-
ctx := context.Background()
144-
ctx, cancel := context.WithCancel(ctx)
142+
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
145143
defer cancel()
146144
c, namespace := setup(ctx, t)
147145
t.Parallel()
148146

149-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
150-
defer tearDown(ctx, t, c, namespace)
147+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
148+
defer tearDown(context.Background(), t, c, namespace)
151149

152150
t.Logf("Creating Task and TaskRun in namespace %s", namespace)
153151
if _, err := c.TaskClient.Create(ctx, tb.Task("giraffe",
@@ -168,14 +166,13 @@ func TestTaskRunTimeout(t *testing.T) {
168166
}
169167

170168
func TestPipelineTaskTimeout(t *testing.T) {
171-
ctx := context.Background()
172-
ctx, cancel := context.WithCancel(ctx)
169+
ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute)
173170
defer cancel()
174171
c, namespace := setup(ctx, t)
175172
t.Parallel()
176173

177-
knativetest.CleanupOnInterrupt(func() { tearDown(ctx, t, c, namespace) }, t.Logf)
178-
defer tearDown(ctx, t, c, namespace)
174+
knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf)
175+
defer tearDown(context.Background(), t, c, namespace)
179176

180177
t.Logf("Creating Tasks in namespace %s", namespace)
181178
task1 := tb.Task("success", tb.TaskSpec(

test/v1alpha1/wait.go

+25
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C
7575
defer span.End()
7676

7777
return wait.PollImmediate(interval, timeout, func() (bool, error) {
78+
select {
79+
case <-ctx.Done():
80+
return true, ctx.Err()
81+
default:
82+
}
7883
r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{})
7984
if err != nil {
8085
return true, err
@@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp
9398
defer span.End()
9499

95100
return wait.PollImmediate(interval, timeout, func() (bool, error) {
101+
select {
102+
case <-ctx.Done():
103+
return true, ctx.Err()
104+
default:
105+
}
96106
d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
97107
if err != nil {
98108
return true, err
@@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
111121
defer span.End()
112122

113123
return wait.PollImmediate(interval, timeout, func() (bool, error) {
124+
select {
125+
case <-ctx.Done():
126+
return true, ctx.Err()
127+
default:
128+
}
114129
r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
115130
if err != nil {
116131
return true, err
@@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt
129144
defer span.End()
130145

131146
return wait.PollImmediate(interval, polltimeout, func() (bool, error) {
147+
select {
148+
case <-ctx.Done():
149+
return true, ctx.Err()
150+
default:
151+
}
132152
r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{})
133153
if err != nil {
134154
return true, err
@@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n
147167
defer span.End()
148168

149169
return wait.PollImmediate(interval, timeout, func() (bool, error) {
170+
select {
171+
case <-ctx.Done():
172+
return true, ctx.Err()
173+
default:
174+
}
150175
r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{})
151176
if err != nil {
152177
return true, err

test/wait.go

+25
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ func WaitForTaskRunState(ctx context.Context, c *clients, name string, inState C
7575
defer span.End()
7676

7777
return wait.PollImmediate(interval, timeout, func() (bool, error) {
78+
select {
79+
case <-ctx.Done():
80+
return true, ctx.Err()
81+
default:
82+
}
7883
r, err := c.TaskRunClient.Get(ctx, name, metav1.GetOptions{})
7984
if err != nil {
8085
return true, err
@@ -93,6 +98,11 @@ func WaitForDeploymentState(ctx context.Context, c *clients, name string, namesp
9398
defer span.End()
9499

95100
return wait.PollImmediate(interval, timeout, func() (bool, error) {
101+
select {
102+
case <-ctx.Done():
103+
return true, ctx.Err()
104+
default:
105+
}
96106
d, err := c.KubeClient.Kube.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
97107
if err != nil {
98108
return true, err
@@ -111,6 +121,11 @@ func WaitForPodState(ctx context.Context, c *clients, name string, namespace str
111121
defer span.End()
112122

113123
return wait.PollImmediate(interval, timeout, func() (bool, error) {
124+
select {
125+
case <-ctx.Done():
126+
return true, ctx.Err()
127+
default:
128+
}
114129
r, err := c.KubeClient.Kube.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
115130
if err != nil {
116131
return true, err
@@ -129,6 +144,11 @@ func WaitForPipelineRunState(ctx context.Context, c *clients, name string, pollt
129144
defer span.End()
130145

131146
return wait.PollImmediate(interval, polltimeout, func() (bool, error) {
147+
select {
148+
case <-ctx.Done():
149+
return true, ctx.Err()
150+
default:
151+
}
132152
r, err := c.PipelineRunClient.Get(ctx, name, metav1.GetOptions{})
133153
if err != nil {
134154
return true, err
@@ -147,6 +167,11 @@ func WaitForServiceExternalIPState(ctx context.Context, c *clients, namespace, n
147167
defer span.End()
148168

149169
return wait.PollImmediate(interval, timeout, func() (bool, error) {
170+
select {
171+
case <-ctx.Done():
172+
return true, ctx.Err()
173+
default:
174+
}
150175
r, err := c.KubeClient.Kube.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{})
151176
if err != nil {
152177
return true, err

0 commit comments

Comments
 (0)