Skip to content

Commit 4d1980f

Browse files
Scotttekton-robot
Scott
authored andcommitted
Enforce Default TaskRun Timeout
The TaskRun spec describes a default timeout of ten minutes but this isn't documented anywhere else or enforced by the TaskRun reconciler. This commit: - updates the docs for TaskRuns to make the default timeout clear for users - changes the taskrun reconciler to use the default timeout if one isn't specified in the TaskRun yaml - adds a test to check whether the taskrun reconciler's checkTimeout() function is failing taskruns running longer than 10 minutes
1 parent c65e64e commit 4d1980f

File tree

4 files changed

+89
-60
lines changed

4 files changed

+89
-60
lines changed

docs/taskruns.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ following fields:
4949
- [`inputs`] - Specifies [input parameters](#input-parameters) and
5050
[input resources](#providing-resources)
5151
- [`outputs`] - Specifies [output resources](#providing-resources)
52-
- `timeout` - Specifies timeout after which the `TaskRun` will fail.
52+
- `timeout` - Specifies timeout after which the `TaskRun` will fail. Defaults
53+
to ten minutes.
5354
- [`nodeSelector`] - a selector which must be true for the pod to fit on a
5455
node. The selector which must match a node's labels for the pod to be
5556
scheduled on that node. More info:

pkg/reconciler/timeout_handler.go

+6-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,10 @@ func (t *TimeoutSet) getOrCreateFinishedChan(runObj StatusKey) chan bool {
9090
return finished
9191
}
9292

93-
func getTimeout(d *metav1.Duration) time.Duration {
93+
// GetTimeout takes a kubernetes Duration representing the timeout period for a
94+
// resource and returns it as a time.Duration. If the provided duration is nil
95+
// then fallback behaviour is to return a default timeout period.
96+
func GetTimeout(d *metav1.Duration) time.Duration {
9497
timeout := defaultTimeout
9598
if d != nil {
9699
timeout = d.Duration
@@ -154,14 +157,14 @@ func (t *TimeoutSet) checkTaskRunTimeouts(namespace string) {
154157
// 1. Stop signal, 2. TaskRun to complete or 3. Taskrun to time out, which is
155158
// determined by checking if the tr's timeout has occurred since the startTime
156159
func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun, startTime *metav1.Time) {
157-
t.waitRun(tr, getTimeout(tr.Spec.Timeout), startTime, t.taskRunCallbackFunc)
160+
t.waitRun(tr, GetTimeout(tr.Spec.Timeout), startTime, t.taskRunCallbackFunc)
158161
}
159162

160163
// WaitPipelineRun function creates a blocking function for pipelinerun to wait for
161164
// 1. Stop signal, 2. pipelinerun to complete or 3. pipelinerun to time out which is
162165
// determined by checking if the tr's timeout has occurred since the startTime
163166
func (t *TimeoutSet) WaitPipelineRun(pr *v1alpha1.PipelineRun, startTime *metav1.Time) {
164-
t.waitRun(pr, getTimeout(pr.Spec.Timeout), startTime, t.pipelineRunCallbackFunc)
167+
t.waitRun(pr, GetTimeout(pr.Spec.Timeout), startTime, t.pipelineRunCallbackFunc)
165168
}
166169

167170
func (t *TimeoutSet) waitRun(runObj StatusKey, timeout time.Duration, startTime *metav1.Time, callback func(interface{})) {

pkg/reconciler/v1alpha1/taskrun/taskrun.go

+19-22
Original file line numberDiff line numberDiff line change
@@ -535,30 +535,27 @@ func (c *Reconciler) checkTimeout(tr *v1alpha1.TaskRun, ts *v1alpha1.TaskSpec, d
535535
return false, nil
536536
}
537537

538-
if tr.Spec.Timeout != nil {
539-
timeout := tr.Spec.Timeout.Duration
540-
runtime := time.Since(tr.Status.StartTime.Time)
541-
542-
c.Logger.Infof("Checking timeout for TaskRun %q (startTime %s, timeout %s, runtime %s)", tr.Name, tr.Status.StartTime, timeout, runtime)
543-
if runtime > timeout {
544-
c.Logger.Infof("TaskRun %q is timeout (runtime %s over %s), deleting pod", tr.Name, runtime, timeout)
545-
if err := dp(tr.Status.PodName, &metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
546-
c.Logger.Errorf("Failed to terminate pod: %v", err)
547-
return true, err
548-
}
538+
timeout := reconciler.GetTimeout(tr.Spec.Timeout)
539+
runtime := time.Since(tr.Status.StartTime.Time)
540+
c.Logger.Infof("Checking timeout for TaskRun %q (startTime %s, timeout %s, runtime %s)", tr.Name, tr.Status.StartTime, timeout, runtime)
541+
if runtime > timeout {
542+
c.Logger.Infof("TaskRun %q is timeout (runtime %s over %s), deleting pod", tr.Name, runtime, timeout)
543+
if err := dp(tr.Status.PodName, &metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
544+
c.Logger.Errorf("Failed to terminate pod: %v", err)
545+
return true, err
546+
}
549547

550-
timeoutMsg := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, timeout.String())
551-
tr.Status.SetCondition(&apis.Condition{
552-
Type: apis.ConditionSucceeded,
553-
Status: corev1.ConditionFalse,
554-
Reason: reasonTimedOut,
555-
Message: timeoutMsg,
556-
})
557-
// update tr completed time
558-
tr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
548+
timeoutMsg := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, timeout.String())
549+
tr.Status.SetCondition(&apis.Condition{
550+
Type: apis.ConditionSucceeded,
551+
Status: corev1.ConditionFalse,
552+
Reason: reasonTimedOut,
553+
Message: timeoutMsg,
554+
})
555+
// update tr completed time
556+
tr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
559557

560-
return true, nil
561-
}
558+
return true, nil
562559
}
563560
return false, nil
564561
}

pkg/reconciler/v1alpha1/taskrun/taskrun_test.go

+62-34
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,7 @@ func TestReconcileOnCompletedTaskRun(t *testing.T) {
12891289
if err != nil {
12901290
t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err)
12911291
}
1292-
if d := cmp.Diff(newTr.Status.GetCondition(apis.ConditionSucceeded), taskSt, ignoreLastTransitionTime); d != "" {
1292+
if d := cmp.Diff(taskSt, newTr.Status.GetCondition(apis.ConditionSucceeded), ignoreLastTransitionTime); d != "" {
12931293
t.Fatalf("-want, +got: %v", d)
12941294
}
12951295
}
@@ -1325,47 +1325,75 @@ func TestReconcileOnCancelledTaskRun(t *testing.T) {
13251325
Reason: "TaskRunCancelled",
13261326
Message: `TaskRun "test-taskrun-run-cancelled" was cancelled`,
13271327
}
1328-
if d := cmp.Diff(newTr.Status.GetCondition(apis.ConditionSucceeded), expectedStatus, ignoreLastTransitionTime); d != "" {
1328+
if d := cmp.Diff(expectedStatus, newTr.Status.GetCondition(apis.ConditionSucceeded), ignoreLastTransitionTime); d != "" {
13291329
t.Fatalf("-want, +got: %v", d)
13301330
}
13311331
}
13321332

1333-
func TestReconcileOnTimedOutTaskRun(t *testing.T) {
1334-
taskRun := tb.TaskRun("test-taskrun-timeout", "foo",
1335-
tb.TaskRunSpec(
1336-
tb.TaskRunTaskRef(simpleTask.Name),
1337-
tb.TaskRunTimeout(10*time.Second),
1338-
),
1339-
tb.TaskRunStatus(tb.Condition(apis.Condition{
1340-
Type: apis.ConditionSucceeded,
1341-
Status: corev1.ConditionUnknown}),
1342-
tb.TaskRunStartTime(time.Now().Add(-15*time.Second))))
1343-
1344-
d := test.Data{
1345-
TaskRuns: []*v1alpha1.TaskRun{taskRun},
1346-
Tasks: []*v1alpha1.Task{simpleTask},
1333+
func TestReconcileTimeouts(t *testing.T) {
1334+
type testCase struct {
1335+
taskRun *v1alpha1.TaskRun
1336+
expectedStatus *apis.Condition
13471337
}
13481338

1349-
testAssets := getTaskRunController(t, d)
1350-
c := testAssets.Controller
1351-
clients := testAssets.Clients
1352-
1353-
if err := c.Reconciler.Reconcile(context.Background(), fmt.Sprintf("%s/%s", taskRun.Namespace, taskRun.Name)); err != nil {
1354-
t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err)
1355-
}
1356-
newTr, err := clients.Pipeline.TektonV1alpha1().TaskRuns(taskRun.Namespace).Get(taskRun.Name, metav1.GetOptions{})
1357-
if err != nil {
1358-
t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err)
1339+
testcases := []testCase{
1340+
{
1341+
taskRun: tb.TaskRun("test-taskrun-timeout", "foo",
1342+
tb.TaskRunSpec(
1343+
tb.TaskRunTaskRef(simpleTask.Name),
1344+
tb.TaskRunTimeout(10*time.Second),
1345+
),
1346+
tb.TaskRunStatus(tb.Condition(apis.Condition{
1347+
Type: apis.ConditionSucceeded,
1348+
Status: corev1.ConditionUnknown}),
1349+
tb.TaskRunStartTime(time.Now().Add(-15*time.Second)))),
1350+
1351+
expectedStatus: &apis.Condition{
1352+
Type: apis.ConditionSucceeded,
1353+
Status: corev1.ConditionFalse,
1354+
Reason: "TaskRunTimeout",
1355+
Message: `TaskRun "test-taskrun-timeout" failed to finish within "10s"`,
1356+
},
1357+
},
1358+
{
1359+
taskRun: tb.TaskRun("test-taskrun-default-timeout-10-minutes", "foo",
1360+
tb.TaskRunSpec(
1361+
tb.TaskRunTaskRef(simpleTask.Name),
1362+
),
1363+
tb.TaskRunStatus(tb.Condition(apis.Condition{
1364+
Type: apis.ConditionSucceeded,
1365+
Status: corev1.ConditionUnknown}),
1366+
tb.TaskRunStartTime(time.Now().Add(-11*time.Minute)))),
1367+
1368+
expectedStatus: &apis.Condition{
1369+
Type: apis.ConditionSucceeded,
1370+
Status: corev1.ConditionFalse,
1371+
Reason: "TaskRunTimeout",
1372+
Message: `TaskRun "test-taskrun-default-timeout-10-minutes" failed to finish within "10m0s"`,
1373+
},
1374+
},
13591375
}
13601376

1361-
expectedStatus := &apis.Condition{
1362-
Type: apis.ConditionSucceeded,
1363-
Status: corev1.ConditionFalse,
1364-
Reason: "TaskRunTimeout",
1365-
Message: `TaskRun "test-taskrun-timeout" failed to finish within "10s"`,
1366-
}
1367-
if d := cmp.Diff(newTr.Status.GetCondition(apis.ConditionSucceeded), expectedStatus, ignoreLastTransitionTime); d != "" {
1368-
t.Fatalf("-want, +got: %v", d)
1377+
for _, tc := range testcases {
1378+
d := test.Data{
1379+
TaskRuns: []*v1alpha1.TaskRun{tc.taskRun},
1380+
Tasks: []*v1alpha1.Task{simpleTask},
1381+
}
1382+
testAssets := getTaskRunController(t, d)
1383+
c := testAssets.Controller
1384+
clients := testAssets.Clients
1385+
1386+
if err := c.Reconciler.Reconcile(context.Background(), fmt.Sprintf("%s/%s", tc.taskRun.Namespace, tc.taskRun.Name)); err != nil {
1387+
t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err)
1388+
}
1389+
newTr, err := clients.Pipeline.TektonV1alpha1().TaskRuns(tc.taskRun.Namespace).Get(tc.taskRun.Name, metav1.GetOptions{})
1390+
if err != nil {
1391+
t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", tc.taskRun.Name, err)
1392+
}
1393+
condition := newTr.Status.GetCondition(apis.ConditionSucceeded)
1394+
if d := cmp.Diff(tc.expectedStatus, condition, ignoreLastTransitionTime); d != "" {
1395+
t.Fatalf("-want, +got: %v", d)
1396+
}
13691397
}
13701398
}
13711399

0 commit comments

Comments
 (0)