Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enforce Default TaskRun Timeout #871

Merged
merged 1 commit into from May 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/taskruns.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ following fields:
- [`inputs`] - Specifies [input parameters](#input-parameters) and
[input resources](#providing-resources)
- [`outputs`] - Specifies [output resources](#providing-resources)
- `timeout` - Specifies timeout after which the `TaskRun` will fail.
- `timeout` - Specifies timeout after which the `TaskRun` will fail. Defaults
to ten minutes.
- [`nodeSelector`] - a selector which must be true for the pod to fit on a
node. The selector which must match a node's labels for the pod to be
scheduled on that node. More info:
Expand Down
9 changes: 6 additions & 3 deletions pkg/reconciler/timeout_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ func (t *TimeoutSet) getOrCreateFinishedChan(runObj StatusKey) chan bool {
return finished
}

func getTimeout(d *metav1.Duration) time.Duration {
// GetTimeout takes a kubernetes Duration representing the timeout period for a
// resource and returns it as a time.Duration. If the provided duration is nil
// then fallback behaviour is to return a default timeout period.
func GetTimeout(d *metav1.Duration) time.Duration {
timeout := defaultTimeout
if d != nil {
timeout = d.Duration
Expand Down Expand Up @@ -154,14 +157,14 @@ func (t *TimeoutSet) checkTaskRunTimeouts(namespace string) {
// 1. Stop signal, 2. TaskRun to complete or 3. Taskrun to time out, which is
// determined by checking if the tr's timeout has occurred since the startTime
func (t *TimeoutSet) WaitTaskRun(tr *v1alpha1.TaskRun, startTime *metav1.Time) {
t.waitRun(tr, getTimeout(tr.Spec.Timeout), startTime, t.taskRunCallbackFunc)
t.waitRun(tr, GetTimeout(tr.Spec.Timeout), startTime, t.taskRunCallbackFunc)
}

// WaitPipelineRun function creates a blocking function for pipelinerun to wait for
// 1. Stop signal, 2. pipelinerun to complete or 3. pipelinerun to time out which is
// determined by checking if the tr's timeout has occurred since the startTime
func (t *TimeoutSet) WaitPipelineRun(pr *v1alpha1.PipelineRun, startTime *metav1.Time) {
t.waitRun(pr, getTimeout(pr.Spec.Timeout), startTime, t.pipelineRunCallbackFunc)
t.waitRun(pr, GetTimeout(pr.Spec.Timeout), startTime, t.pipelineRunCallbackFunc)
}

func (t *TimeoutSet) waitRun(runObj StatusKey, timeout time.Duration, startTime *metav1.Time, callback func(interface{})) {
Expand Down
41 changes: 19 additions & 22 deletions pkg/reconciler/v1alpha1/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,30 +535,27 @@ func (c *Reconciler) checkTimeout(tr *v1alpha1.TaskRun, ts *v1alpha1.TaskSpec, d
return false, nil
}

if tr.Spec.Timeout != nil {
timeout := tr.Spec.Timeout.Duration
runtime := time.Since(tr.Status.StartTime.Time)

c.Logger.Infof("Checking timeout for TaskRun %q (startTime %s, timeout %s, runtime %s)", tr.Name, tr.Status.StartTime, timeout, runtime)
if runtime > timeout {
c.Logger.Infof("TaskRun %q is timeout (runtime %s over %s), deleting pod", tr.Name, runtime, timeout)
if err := dp(tr.Status.PodName, &metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
c.Logger.Errorf("Failed to terminate pod: %v", err)
return true, err
}
timeout := reconciler.GetTimeout(tr.Spec.Timeout)
runtime := time.Since(tr.Status.StartTime.Time)
c.Logger.Infof("Checking timeout for TaskRun %q (startTime %s, timeout %s, runtime %s)", tr.Name, tr.Status.StartTime, timeout, runtime)
if runtime > timeout {
c.Logger.Infof("TaskRun %q is timeout (runtime %s over %s), deleting pod", tr.Name, runtime, timeout)
if err := dp(tr.Status.PodName, &metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
c.Logger.Errorf("Failed to terminate pod: %v", err)
return true, err
}

timeoutMsg := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, timeout.String())
tr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: reasonTimedOut,
Message: timeoutMsg,
})
// update tr completed time
tr.Status.CompletionTime = &metav1.Time{Time: time.Now()}
timeoutMsg := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, timeout.String())
tr.Status.SetCondition(&apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: reasonTimedOut,
Message: timeoutMsg,
})
// update tr completed time
tr.Status.CompletionTime = &metav1.Time{Time: time.Now()}

return true, nil
}
return true, nil
}
return false, nil
}
96 changes: 62 additions & 34 deletions pkg/reconciler/v1alpha1/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1289,7 +1289,7 @@ func TestReconcileOnCompletedTaskRun(t *testing.T) {
if err != nil {
t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err)
}
if d := cmp.Diff(newTr.Status.GetCondition(apis.ConditionSucceeded), taskSt, ignoreLastTransitionTime); d != "" {
if d := cmp.Diff(taskSt, newTr.Status.GetCondition(apis.ConditionSucceeded), ignoreLastTransitionTime); d != "" {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tweaked this and line 1328 because the -want, +got results were backwards.

t.Fatalf("-want, +got: %v", d)
}
}
Expand Down Expand Up @@ -1325,47 +1325,75 @@ func TestReconcileOnCancelledTaskRun(t *testing.T) {
Reason: "TaskRunCancelled",
Message: `TaskRun "test-taskrun-run-cancelled" was cancelled`,
}
if d := cmp.Diff(newTr.Status.GetCondition(apis.ConditionSucceeded), expectedStatus, ignoreLastTransitionTime); d != "" {
if d := cmp.Diff(expectedStatus, newTr.Status.GetCondition(apis.ConditionSucceeded), ignoreLastTransitionTime); d != "" {
t.Fatalf("-want, +got: %v", d)
}
}

func TestReconcileOnTimedOutTaskRun(t *testing.T) {
taskRun := tb.TaskRun("test-taskrun-timeout", "foo",
tb.TaskRunSpec(
tb.TaskRunTaskRef(simpleTask.Name),
tb.TaskRunTimeout(10*time.Second),
),
tb.TaskRunStatus(tb.Condition(apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionUnknown}),
tb.TaskRunStartTime(time.Now().Add(-15*time.Second))))

d := test.Data{
TaskRuns: []*v1alpha1.TaskRun{taskRun},
Tasks: []*v1alpha1.Task{simpleTask},
func TestReconcileTimeouts(t *testing.T) {
type testCase struct {
taskRun *v1alpha1.TaskRun
expectedStatus *apis.Condition
}

testAssets := getTaskRunController(t, d)
c := testAssets.Controller
clients := testAssets.Clients

if err := c.Reconciler.Reconcile(context.Background(), fmt.Sprintf("%s/%s", taskRun.Namespace, taskRun.Name)); err != nil {
t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err)
}
newTr, err := clients.Pipeline.TektonV1alpha1().TaskRuns(taskRun.Namespace).Get(taskRun.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err)
testcases := []testCase{
{
taskRun: tb.TaskRun("test-taskrun-timeout", "foo",
tb.TaskRunSpec(
tb.TaskRunTaskRef(simpleTask.Name),
tb.TaskRunTimeout(10*time.Second),
),
tb.TaskRunStatus(tb.Condition(apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionUnknown}),
tb.TaskRunStartTime(time.Now().Add(-15*time.Second)))),

expectedStatus: &apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "TaskRunTimeout",
Message: `TaskRun "test-taskrun-timeout" failed to finish within "10s"`,
},
},
{
taskRun: tb.TaskRun("test-taskrun-default-timeout-10-minutes", "foo",
tb.TaskRunSpec(
tb.TaskRunTaskRef(simpleTask.Name),
),
tb.TaskRunStatus(tb.Condition(apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionUnknown}),
tb.TaskRunStartTime(time.Now().Add(-11*time.Minute)))),

expectedStatus: &apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "TaskRunTimeout",
Message: `TaskRun "test-taskrun-default-timeout-10-minutes" failed to finish within "10m0s"`,
},
},
}

expectedStatus := &apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "TaskRunTimeout",
Message: `TaskRun "test-taskrun-timeout" failed to finish within "10s"`,
}
if d := cmp.Diff(newTr.Status.GetCondition(apis.ConditionSucceeded), expectedStatus, ignoreLastTransitionTime); d != "" {
t.Fatalf("-want, +got: %v", d)
for _, tc := range testcases {
d := test.Data{
TaskRuns: []*v1alpha1.TaskRun{tc.taskRun},
Tasks: []*v1alpha1.Task{simpleTask},
}
testAssets := getTaskRunController(t, d)
c := testAssets.Controller
clients := testAssets.Clients

if err := c.Reconciler.Reconcile(context.Background(), fmt.Sprintf("%s/%s", tc.taskRun.Namespace, tc.taskRun.Name)); err != nil {
t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err)
}
newTr, err := clients.Pipeline.TektonV1alpha1().TaskRuns(tc.taskRun.Namespace).Get(tc.taskRun.Name, metav1.GetOptions{})
if err != nil {
t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", tc.taskRun.Name, err)
}
condition := newTr.Status.GetCondition(apis.ConditionSucceeded)
if d := cmp.Diff(tc.expectedStatus, condition, ignoreLastTransitionTime); d != "" {
t.Fatalf("-want, +got: %v", d)
}
}
}

Expand Down