Skip to content

Commit 9425e01

Browse files
bobcatfishknative-prow-robot
authored andcommitted
Remove extra log PVC
We noticed early on that logs from init containers are often cleaned up immediately by k8s, particularly if the containers are short running (e.g. just echoing "hello world"). We started down a path to correct that, which takes an approach based on Prow's entrypoint solution (https://github.com/kubernetes/test-infra/tree/master/prow/cmd/entrypoint) (even using the same image at the moment!) which wraps the user's provided command and streams logs to a volume, from which the logs can be uploaded/streamed by a sidecar. Since we are using init containers for step execution, we can't yet use sidecars, but we are addressing that in #224 (also an entrypoint re-writing based solution). Once we have that, we can sidecar support, starting with GCS as a POC (#107) and moving into other types. In the meantime, to enable us to get logs (particularly in tests), we had the taskrun controller create a PVC on the fly to hold logs. This has two problems: * The PVCs are not cleaned up so this is an unexpected side effect for users * Combined with PVC based input + ouput linking, this causes scheduling problems for the resulting pods (#375) Now that we want to have an official release, this would be a bad state to release in, so we will remove this magical log PVC creation logic, which was never our intended end state anyway. Since we _do_ need the entrypoint rewriting and log interception logic in the long run, this commit leaves most functionality intact, removing only the PVC creation and changing the volume being used to an `emptyDir`, which is what we will likely use for #107 (and this is how Prow handles this as well). This means the released functionality will be streaming logs to a location where nothing can read them, however I think it is better than completely removing the functionality b/c: 1. We need the functionality in the long run 2. Users should be prepared for this functionality (e.g. dealing with edge cases around the taskrun controller being able to fetch an image's entrypoint) Fixes #387
1 parent 23a2b91 commit 9425e01

11 files changed

+189
-390
lines changed

docs/using.md

+8-3
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,14 @@ specific contract.
123123
#### Entrypoint
124124

125125
When containers are run in a `Task`, the `entrypoint` of the container will be
126-
overwritten with a custom binary that redirects the logs to a separate location
127-
for aggregating the log output. As such, it is always recommended to explicitly
128-
specify a command.
126+
overwritten with a custom binary. The plan is to use this custom binary for
127+
controlling the execution of step containers ([#224](https://github.com/knative/build-pipeline/issues/224)) and log streaming
128+
[#107](https://github.com/knative/build-pipeline/issues/107), though currently
129+
it will write logs only to an [`emptyDir`](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir)
130+
(which cannot be read from after the pod has finished executing, so logs must be obtained
131+
[via k8s logs](https://kubernetes.io/docs/concepts/cluster-administration/logging/),
132+
using a tool such as [test/logs/README.md](../test/logs/README.md),
133+
or setting up an external system to consume logs).
129134

130135
When `command` is not explicitly set, the controller will attempt to lookup the
131136
entrypoint from the remote registry.

pkg/reconciler/v1alpha1/taskrun/taskrun.go

+7-53
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,9 @@ import (
3939
corev1 "k8s.io/api/core/v1"
4040
"k8s.io/apimachinery/pkg/api/equality"
4141
"k8s.io/apimachinery/pkg/api/errors"
42-
"k8s.io/apimachinery/pkg/api/resource"
4342
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4443
"k8s.io/apimachinery/pkg/runtime/schema"
4544
coreinformers "k8s.io/client-go/informers/core/v1"
46-
"k8s.io/client-go/kubernetes"
4745
"k8s.io/client-go/tools/cache"
4846
)
4947

@@ -71,8 +69,6 @@ const (
7169
taskRunAgentName = "taskrun-controller"
7270
// taskRunControllerName defines name for TaskRun Controller
7371
taskRunControllerName = "TaskRun"
74-
75-
pvcSizeBytes = 5 * 1024 * 1024 * 1024 // 5 GBs
7672
)
7773

7874
var (
@@ -276,19 +272,8 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun) error
276272
return err
277273
}
278274
} else {
279-
pvc, err := c.KubeClientSet.CoreV1().PersistentVolumeClaims(tr.Namespace).Get(tr.Name, metav1.GetOptions{})
280-
if errors.IsNotFound(err) {
281-
// Create a persistent volume claim to hold Build logs
282-
pvc, err = createPVC(c.KubeClientSet, tr)
283-
if err != nil {
284-
return fmt.Errorf("Failed to create persistent volume claim %s for task %q: %v", tr.Name, err, tr.Name)
285-
}
286-
} else if err != nil {
287-
c.Logger.Errorf("Failed to reconcile taskrun: %q, failed to get pvc %q: %v", tr.Name, tr.Name, err)
288-
return err
289-
}
290275
// Build pod is not present, create build pod.
291-
pod, err = c.createBuildPod(ctx, tr, rtr.TaskSpec, rtr.TaskName, pvc.Name)
276+
pod, err = c.createBuildPod(ctx, tr, rtr.TaskSpec, rtr.TaskName)
292277
if err != nil {
293278
// This Run has failed, so we need to mark it as failed and stop reconciling it
294279
var msg string
@@ -368,40 +353,9 @@ func (c *Reconciler) updateStatus(taskrun *v1alpha1.TaskRun) (*v1alpha1.TaskRun,
368353
return newtaskrun, nil
369354
}
370355

371-
// createPVC will create a persistent volume mount for tr which
372-
// will be used to gather logs using the entrypoint wrapper
373-
func createPVC(kc kubernetes.Interface, tr *v1alpha1.TaskRun) (*corev1.PersistentVolumeClaim, error) {
374-
v, err := kc.CoreV1().PersistentVolumeClaims(tr.Namespace).Create(
375-
&corev1.PersistentVolumeClaim{
376-
ObjectMeta: metav1.ObjectMeta{
377-
Namespace: tr.Namespace,
378-
// This pvc is specific to this TaskRun, so we'll use the same name
379-
Name: tr.Name,
380-
OwnerReferences: []metav1.OwnerReference{
381-
*metav1.NewControllerRef(tr, groupVersionKind),
382-
},
383-
},
384-
Spec: corev1.PersistentVolumeClaimSpec{
385-
AccessModes: []corev1.PersistentVolumeAccessMode{
386-
corev1.ReadWriteOnce,
387-
},
388-
Resources: corev1.ResourceRequirements{
389-
Requests: map[corev1.ResourceName]resource.Quantity{
390-
corev1.ResourceStorage: *resource.NewQuantity(pvcSizeBytes, resource.BinarySI),
391-
},
392-
},
393-
},
394-
},
395-
)
396-
if err != nil {
397-
return nil, fmt.Errorf("failed to claim Persistent Volume %q due to error: %s", tr.Name, err)
398-
}
399-
return v, nil
400-
}
401-
402356
// createPod creates a Pod based on the Task's configuration, with pvcName as a
403357
// volumeMount
404-
func (c *Reconciler) createBuildPod(ctx context.Context, tr *v1alpha1.TaskRun, ts *v1alpha1.TaskSpec, taskName, pvcName string) (*corev1.Pod, error) {
358+
func (c *Reconciler) createBuildPod(ctx context.Context, tr *v1alpha1.TaskRun, ts *v1alpha1.TaskSpec, taskName string) (*corev1.Pod, error) {
405359
// TODO: Preferably use Validate on task.spec to catch validation error
406360
bs := ts.GetBuildSpec()
407361
if bs == nil {
@@ -434,7 +388,7 @@ func (c *Reconciler) createBuildPod(ctx context.Context, tr *v1alpha1.TaskRun, t
434388
}
435389
}
436390

437-
build, err := createRedirectedBuild(ctx, bSpec, pvcName, tr)
391+
build, err := createRedirectedBuild(ctx, bSpec, tr)
438392
if err != nil {
439393
return nil, fmt.Errorf("couldn't create redirected Build: %v", err)
440394
}
@@ -488,7 +442,7 @@ func (c *Reconciler) createBuildPod(ctx context.Context, tr *v1alpha1.TaskRun, t
488442
// an entrypoint cache creates a build where all entrypoints are switched to
489443
// be the entrypoint redirector binary. This function assumes that it receives
490444
// its own copy of the BuildSpec and modifies it freely
491-
func createRedirectedBuild(ctx context.Context, bs *buildv1alpha1.BuildSpec, pvcName string, tr *v1alpha1.TaskRun) (*buildv1alpha1.Build, error) {
445+
func createRedirectedBuild(ctx context.Context, bs *buildv1alpha1.BuildSpec, tr *v1alpha1.TaskRun) (*buildv1alpha1.Build, error) {
492446
// Pass service account name from taskrun to build
493447
bs.ServiceAccountName = tr.Spec.ServiceAccount
494448

@@ -519,9 +473,9 @@ func createRedirectedBuild(ctx context.Context, bs *buildv1alpha1.BuildSpec, pvc
519473
b.Spec.Volumes = append(b.Spec.Volumes, corev1.Volume{
520474
Name: entrypoint.MountName,
521475
VolumeSource: corev1.VolumeSource{
522-
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
523-
ClaimName: pvcName,
524-
},
476+
// TODO(#107) we need to actually stream these logs somewhere, probably via sidecar.
477+
// Currently these logs will be lost when the pod is unscheduled.
478+
EmptyDir: &corev1.EmptyDirVolumeSource{},
525479
},
526480
})
527481

pkg/reconciler/v1alpha1/taskrun/taskrun_test.go

+2-49
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ import (
3535
"go.uber.org/zap"
3636
"go.uber.org/zap/zaptest/observer"
3737
corev1 "k8s.io/api/core/v1"
38-
"k8s.io/apimachinery/pkg/api/resource"
3938
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4039
"k8s.io/apimachinery/pkg/runtime"
4140
fakekubeclientset "k8s.io/client-go/kubernetes/fake"
@@ -108,36 +107,11 @@ var (
108107
))
109108
)
110109

111-
func getExpectedPVC(tr *v1alpha1.TaskRun) *corev1.PersistentVolumeClaim {
112-
return &corev1.PersistentVolumeClaim{
113-
ObjectMeta: metav1.ObjectMeta{
114-
Namespace: tr.Namespace,
115-
// This pvc is specific to this TaskRun, so we'll use the same name
116-
Name: tr.Name,
117-
OwnerReferences: []metav1.OwnerReference{
118-
*metav1.NewControllerRef(tr, groupVersionKind),
119-
},
120-
},
121-
Spec: corev1.PersistentVolumeClaimSpec{
122-
AccessModes: []corev1.PersistentVolumeAccessMode{
123-
corev1.ReadWriteOnce,
124-
},
125-
Resources: corev1.ResourceRequirements{
126-
Requests: map[corev1.ResourceName]resource.Quantity{
127-
corev1.ResourceStorage: *resource.NewQuantity(pvcSizeBytes, resource.BinarySI),
128-
},
129-
},
130-
},
131-
}
132-
}
133-
134110
func getToolsVolume(claimName string) corev1.Volume {
135111
return corev1.Volume{
136112
Name: toolsMountName,
137113
VolumeSource: corev1.VolumeSource{
138-
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
139-
ClaimName: claimName,
140-
},
114+
EmptyDir: &corev1.EmptyDirVolumeSource{},
141115
},
142116
}
143117
}
@@ -422,27 +396,6 @@ func TestReconcile(t *testing.T) {
422396
if len(clients.Kube.Actions()) == 0 {
423397
t.Fatalf("Expected actions to be logged in the kubeclient, got none")
424398
}
425-
426-
pvc, err := clients.Kube.CoreV1().PersistentVolumeClaims(namespace).Get(name, metav1.GetOptions{})
427-
if err != nil {
428-
t.Errorf("Failed to fetch build: %v", err)
429-
}
430-
431-
expectedVolume := getExpectedPVC(tr)
432-
if d := cmp.Diff(pvc.Name, expectedVolume.Name); d != "" {
433-
t.Errorf("pvc doesn't match, diff: %s", d)
434-
}
435-
if d := cmp.Diff(pvc.OwnerReferences, expectedVolume.OwnerReferences); d != "" {
436-
t.Errorf("pvc doesn't match, diff: %s", d)
437-
}
438-
if d := cmp.Diff(pvc.Spec.AccessModes, expectedVolume.Spec.AccessModes); d != "" {
439-
t.Errorf("pvc doesn't match, diff: %s", d)
440-
}
441-
if pvc.Spec.Resources.Requests["storage"] != expectedVolume.Spec.Resources.Requests["storage"] {
442-
t.Errorf("pvc doesn't match, got: %v, expected: %v",
443-
pvc.Spec.Resources.Requests["storage"],
444-
expectedVolume.Spec.Resources.Requests["storage"])
445-
}
446399
})
447400
}
448401
}
@@ -787,7 +740,7 @@ func TestCreateRedirectedBuild(t *testing.T) {
787740
expectedSteps := len(bs.Steps) + 1
788741
expectedVolumes := len(bs.Volumes) + 1
789742

790-
b, err := createRedirectedBuild(ctx, &bs, "pvc", tr)
743+
b, err := createRedirectedBuild(ctx, &bs, tr)
791744
if err != nil {
792745
t.Errorf("expected createRedirectedBuild to pass: %v", err)
793746
}

test/cancel_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func TestTaskRunPipelineRunCancel(t *testing.T) {
4545
tb.Step("foo", "ubuntu", tb.Command("/bin/bash"), tb.Args("-c", "sleep 500")),
4646
))
4747
if _, err := c.TaskClient.Create(task); err != nil {
48-
t.Fatalf("Failed to create Task `%s`: %s", hwTaskName, err)
48+
t.Fatalf("Failed to create Task `banana`: %s", err)
4949
}
5050

5151
pipeline := tb.Pipeline("tomatoes", namespace,
@@ -64,7 +64,7 @@ func TestTaskRunPipelineRunCancel(t *testing.T) {
6464
c := pr.Status.GetCondition(duckv1alpha1.ConditionSucceeded)
6565
if c != nil {
6666
if c.Status == corev1.ConditionTrue || c.Status == corev1.ConditionFalse {
67-
return true, fmt.Errorf("pipelineRun %s already finished!", "pear")
67+
return true, fmt.Errorf("pipelineRun %s already finished", "pear")
6868
} else if c.Status == corev1.ConditionUnknown && (c.Reason == "Running" || c.Reason == "Pending") {
6969
return true, nil
7070
}
@@ -114,10 +114,10 @@ func TestTaskRunPipelineRunCancel(t *testing.T) {
114114
}
115115
return false, nil
116116
}, "PipelineRunCancelled"); err != nil {
117-
t.Errorf("Error waiting for TaskRun %s to finish: %s", hwTaskRunName, err)
117+
t.Errorf("Error waiting for PipelineRun `pear` to finish: %s", err)
118118
}
119119

120-
logger.Infof("Waiting for TaskRun %s in namespace %s to be cancelled", hwTaskRunName, namespace)
120+
logger.Infof("Waiting for TaskRun `pear-foo` in namespace %s to be cancelled", namespace)
121121
if err := WaitForTaskRunState(c, "pear-foo", func(tr *v1alpha1.TaskRun) (bool, error) {
122122
c := tr.Status.GetCondition(duckv1alpha1.ConditionSucceeded)
123123
if c != nil {

test/crd.go

-149
This file was deleted.

0 commit comments

Comments
 (0)