From 025bee2f9e746a256d8e643bddadfcee65f9fffd Mon Sep 17 00:00:00 2001 From: Tomas Turek Date: Mon, 17 Mar 2025 18:05:36 +0100 Subject: [PATCH] fix: Instability of cleanup process of old configmaps and secrets CTlog, Rekor and Fulcio has broblem when newly created object has been deleted during clean up process of old objects. --- internal/controller/common/action/result.go | 30 ++ .../controller/common/action/result_test.go | 264 ++++++++++++++++++ .../controller/ctlog/actions/server_config.go | 41 ++- .../fulcio/actions/server_config.go | 50 ++-- .../rekor/actions/server/sharding_config.go | 48 ++-- internal/testing/action/result.go | 14 - test/e2e/support/common.go | 6 + 7 files changed, 392 insertions(+), 61 deletions(-) create mode 100644 internal/controller/common/action/result.go create mode 100644 internal/controller/common/action/result_test.go diff --git a/internal/controller/common/action/result.go b/internal/controller/common/action/result.go new file mode 100644 index 000000000..8f13fd6d9 --- /dev/null +++ b/internal/controller/common/action/result.go @@ -0,0 +1,30 @@ +package action + +func IsSuccess(result *Result) bool { + return IsContinue(result) || IsReturn(result) +} + +func IsContinue(result *Result) bool { + return result == nil +} + +func IsReturn(result *Result) bool { + if result == nil { + return false + } + return !IsError(result) && !IsRequeue(result) +} + +func IsError(result *Result) bool { + if result == nil { + return false + } + return result.Err != nil +} + +func IsRequeue(result *Result) bool { + if result == nil { + return false + } + return result.Result.Requeue || result.Result.RequeueAfter > 0 +} diff --git a/internal/controller/common/action/result_test.go b/internal/controller/common/action/result_test.go new file mode 100644 index 000000000..96edbddd3 --- /dev/null +++ b/internal/controller/common/action/result_test.go @@ -0,0 +1,264 @@ +package action + +import ( + "errors" + "testing" + "time" + + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +func TestIsContinue(t *testing.T) { + tests := []struct { + name string + arg *Result + want bool + }{ + { + name: "continue", + want: true, + arg: nil, + }, + { + name: "return", + want: false, + arg: &Result{}, + }, + { + name: "error", + want: false, + arg: &Result{Err: errors.New("error")}, + }, + { + name: "terminal error", + want: false, + arg: &Result{Err: reconcile.TerminalError(errors.New("error"))}, + }, + { + name: "requeue", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}}, + }, + { + name: "requeue after", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: false, RequeueAfter: 5 * time.Second}}, + }, + { + name: "requeue error", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}, Err: errors.New("error")}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsContinue(tt.arg); got != tt.want { + t.Errorf("IsContinue() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsError(t *testing.T) { + tests := []struct { + name string + arg *Result + want bool + }{ + { + name: "continue", + want: false, + arg: nil, + }, + { + name: "return", + want: false, + arg: &Result{}, + }, + { + name: "error", + want: true, + arg: &Result{Err: errors.New("error")}, + }, + { + name: "terminal error", + want: true, + arg: &Result{Err: reconcile.TerminalError(errors.New("error"))}, + }, + { + name: "requeue", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}}, + }, + { + name: "requeue after", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: false, RequeueAfter: 5 * time.Second}}, + }, + { + name: "requeue error", + want: true, + arg: &Result{Result: reconcile.Result{Requeue: true}, Err: errors.New("error")}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsError(tt.arg); got != tt.want { + t.Errorf("IsError() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsRequeue(t *testing.T) { + tests := []struct { + name string + arg *Result + want bool + }{ + { + name: "continue", + want: false, + arg: nil, + }, + { + name: "return", + want: false, + arg: &Result{}, + }, + { + name: "error", + want: false, + arg: &Result{Err: errors.New("error")}, + }, + { + name: "terminal error", + want: false, + arg: &Result{Err: reconcile.TerminalError(errors.New("error"))}, + }, + { + name: "requeue", + want: true, + arg: &Result{Result: reconcile.Result{Requeue: true}}, + }, + { + name: "requeue after", + want: true, + arg: &Result{Result: reconcile.Result{Requeue: false, RequeueAfter: 5 * time.Second}}, + }, + { + name: "requeue error", + want: true, + arg: &Result{Result: reconcile.Result{Requeue: true}, Err: errors.New("error")}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsRequeue(tt.arg); got != tt.want { + t.Errorf("IsRequeue() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsReturn(t *testing.T) { + tests := []struct { + name string + arg *Result + want bool + }{ + { + name: "continue", + want: false, + arg: nil, + }, + { + name: "return", + want: true, + arg: &Result{}, + }, + { + name: "error", + want: false, + arg: &Result{Err: errors.New("error")}, + }, + { + name: "terminal error", + want: false, + arg: &Result{Err: reconcile.TerminalError(errors.New("error"))}, + }, + { + name: "requeue", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}}, + }, + { + name: "requeue after", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: false, RequeueAfter: 5 * time.Second}}, + }, + { + name: "requeue error", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}, Err: errors.New("error")}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsReturn(tt.arg); got != tt.want { + t.Errorf("IsReturn() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsSuccess(t *testing.T) { + tests := []struct { + name string + arg *Result + want bool + }{ + { + name: "continue", + want: true, + arg: nil, + }, + { + name: "return", + want: true, + arg: &Result{}, + }, + { + name: "error", + want: false, + arg: &Result{Err: errors.New("error")}, + }, + { + name: "terminal error", + want: false, + arg: &Result{Err: reconcile.TerminalError(errors.New("error"))}, + }, + { + name: "requeue", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}}, + }, + { + name: "requeue after", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: false, RequeueAfter: 5 * time.Second}}, + }, + { + name: "requeue error", + want: false, + arg: &Result{Result: reconcile.Result{Requeue: true}, Err: errors.New("error")}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsSuccess(tt.arg); got != tt.want { + t.Errorf("IsSuccess() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/controller/ctlog/actions/server_config.go b/internal/controller/ctlog/actions/server_config.go index 5bf60dfd7..e693a9ff3 100644 --- a/internal/controller/ctlog/actions/server_config.go +++ b/internal/controller/ctlog/actions/server_config.go @@ -2,6 +2,7 @@ package actions import ( "context" + "errors" "fmt" rhtasv1alpha1 "github.com/securesign/operator/api/v1alpha1" @@ -147,13 +148,37 @@ func (i serverConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.CTlog) }) } - // try to discover existing config and clear them out + instance.Status.ServerConfigRef = &rhtasv1alpha1.LocalObjectReference{Name: newConfig.Name} + + i.Recorder.Eventf(instance, corev1.EventTypeNormal, "CTLogConfigCreated", "Secret with ctlog configuration created: %s", newConfig.Name) + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: ConfigCondition, + Status: metav1.ConditionTrue, + Reason: constants.Ready, + Message: "Server config created", + ObservedGeneration: instance.Generation, + }) + result := i.StatusUpdate(ctx, instance) + if action.IsSuccess(result) { + i.cleanup(ctx, instance, configLabels) + } + return result +} + +func (i serverConfig) cleanup(ctx context.Context, instance *rhtasv1alpha1.CTlog, configLabels map[string]string) { + if instance.Status.ServerConfigRef == nil || instance.Status.ServerConfigRef.Name == "" { + i.Logger.Error(errors.New("new Secret name is empty"), "unable to clean old objects", "namespace", instance.Namespace) + return + } + + // try to discover existing secrets and clear them out partialConfigs, err := utils.ListSecrets(ctx, i.Client, instance.Namespace, labels2.SelectorFromSet(configLabels).String()) if err != nil { i.Logger.Error(err, "problem with listing configmaps", "namespace", instance.Namespace) + return } for _, partialConfig := range partialConfigs.Items { - if partialConfig.Name == newConfig.Name { + if partialConfig.Name == instance.Status.ServerConfigRef.Name { continue } @@ -166,18 +191,6 @@ func (i serverConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.CTlog) i.Logger.Info("Remove invalid Secret with ctlog configuration", "Name", partialConfig.Name) i.Recorder.Eventf(instance, corev1.EventTypeNormal, "CTLogConfigDeleted", "Secret with ctlog configuration deleted: %s", partialConfig.Name) } - - instance.Status.ServerConfigRef = &rhtasv1alpha1.LocalObjectReference{Name: newConfig.Name} - - i.Recorder.Eventf(instance, corev1.EventTypeNormal, "CTLogConfigCreated", "Secret with ctlog configuration created: %s", newConfig.Name) - meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ - Type: ConfigCondition, - Status: metav1.ConditionTrue, - Reason: constants.Ready, - Message: "Server config created", - ObservedGeneration: instance.Generation, - }) - return i.StatusUpdate(ctx, instance) } func (i serverConfig) handlePrivateKey(instance *rhtasv1alpha1.CTlog) (*ctlogUtils.KeyConfig, error) { diff --git a/internal/controller/fulcio/actions/server_config.go b/internal/controller/fulcio/actions/server_config.go index a36057afc..a5dcfbfcf 100644 --- a/internal/controller/fulcio/actions/server_config.go +++ b/internal/controller/fulcio/actions/server_config.go @@ -2,6 +2,7 @@ package actions import ( "context" + "errors" "fmt" "reflect" @@ -115,6 +116,7 @@ func (i serverConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.Fulcio Namespace: instance.Namespace, }, } + if _, err = kubernetes.CreateOrUpdate(ctx, i.Client, newConfig, ensure.ControllerReference[*v1.ConfigMap](instance, i.Client), @@ -129,13 +131,38 @@ func (i serverConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.Fulcio return i.Error(ctx, fmt.Errorf("could not create Server config: %w", err), instance) } + i.Recorder.Eventf(instance, v1.EventTypeNormal, "FulcioConfigUpdated", "Fulcio config updated: %s", newConfig.Name) + instance.Status.ServerConfigRef = &rhtasv1alpha1.LocalObjectReference{Name: newConfig.Name} + + meta.SetStatusCondition(&instance.Status.Conditions, + metav1.Condition{ + Type: constants.Ready, + Status: metav1.ConditionFalse, + Reason: constants.Creating, + Message: "Server config created"}, + ) + + result := i.StatusUpdate(ctx, instance) + if action.IsSuccess(result) { + i.cleanup(ctx, instance, configLabel) + } + return result +} + +func (i serverConfig) cleanup(ctx context.Context, instance *rhtasv1alpha1.Fulcio, configLabels map[string]string) { + if instance.Status.ServerConfigRef == nil || instance.Status.ServerConfigRef.Name == "" { + i.Logger.Error(errors.New("new ConfigMap name is empty"), "unable to clean old objects", "namespace", instance.Namespace) + return + } + // remove old server configmaps - partialConfigs, err := kubernetes.ListConfigMaps(ctx, i.Client, instance.Namespace, labels2.SelectorFromSet(configLabel).String()) + partialConfigs, err := kubernetes.ListConfigMaps(ctx, i.Client, instance.Namespace, labels2.SelectorFromSet(configLabels).String()) if err != nil { i.Logger.Error(err, "problem with finding configmap") + return } for _, partialConfig := range partialConfigs.Items { - if partialConfig.Name == newConfig.Name { + if partialConfig.Name == instance.Status.ServerConfigRef.Name { continue } @@ -147,21 +174,10 @@ func (i serverConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.Fulcio }) if err != nil { i.Logger.Error(err, "problem with deleting configmap", "name", partialConfig.Name) - } else { - i.Logger.Info("Remove invalid ConfigMap with rekor-server configuration", "name", partialConfig.Name) - i.Recorder.Eventf(instance, v1.EventTypeNormal, "FulcioConfigDeleted", "Fulcio config deleted: %s", partialConfig.Name) + i.Recorder.Eventf(instance, v1.EventTypeWarning, "FulcioConfigDeleted", "Unable to delete secret: %s", partialConfig.Name) + continue } + i.Logger.Info("Remove invalid ConfigMap with Fulcio configuration", "name", partialConfig.Name) + i.Recorder.Eventf(instance, v1.EventTypeNormal, "FulcioConfigDeleted", "Fulcio config deleted: %s", partialConfig.Name) } - - i.Recorder.Eventf(instance, v1.EventTypeNormal, "FulcioConfigUpdated", "Fulcio config updated: %s", newConfig.Name) - instance.Status.ServerConfigRef = &rhtasv1alpha1.LocalObjectReference{Name: newConfig.Name} - - meta.SetStatusCondition(&instance.Status.Conditions, - metav1.Condition{ - Type: constants.Ready, - Status: metav1.ConditionFalse, - Reason: constants.Creating, - Message: "Server config created"}, - ) - return i.StatusUpdate(ctx, instance) } diff --git a/internal/controller/rekor/actions/server/sharding_config.go b/internal/controller/rekor/actions/server/sharding_config.go index 81c894b4b..3fade8b91 100644 --- a/internal/controller/rekor/actions/server/sharding_config.go +++ b/internal/controller/rekor/actions/server/sharding_config.go @@ -2,6 +2,7 @@ package server import ( "context" + "errors" "fmt" "reflect" @@ -88,6 +89,7 @@ func (i shardingConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.Reko Namespace: instance.Namespace, }, } + if _, err = kubernetes.CreateOrUpdate(ctx, i.Client, newConfig, ensure.ControllerReference[*v1.ConfigMap](instance, i.Client), @@ -97,13 +99,37 @@ func (i shardingConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.Reko return i.Error(ctx, fmt.Errorf("could not create sharding config: %w", err), instance) } + i.Recorder.Eventf(instance, v1.EventTypeNormal, "ShardingConfigCreated", "ConfigMap with sharding configuration created: %s", newConfig.Name) + instance.Status.ServerConfigRef = &rhtasv1alpha1.LocalObjectReference{Name: newConfig.Name} + + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: actions.ServerCondition, + Status: metav1.ConditionFalse, + Reason: constants.Creating, + Message: "Sharding config created", + }) + + result := i.StatusUpdate(ctx, instance) + if action.IsSuccess(result) { + i.cleanup(ctx, instance, labels) + } + return result +} + +func (i shardingConfig) cleanup(ctx context.Context, instance *rhtasv1alpha1.Rekor, configLabels map[string]string) { + if instance.Status.ServerConfigRef == nil || instance.Status.ServerConfigRef.Name == "" { + i.Logger.Error(errors.New("new ConfigMap name is empty"), "unable to clean old objects", "namespace", instance.Namespace) + return + } + // remove old server configmaps - partialConfigs, err := kubernetes.ListConfigMaps(ctx, i.Client, instance.Namespace, labels2.SelectorFromSet(labels).String()) + partialConfigs, err := kubernetes.ListConfigMaps(ctx, i.Client, instance.Namespace, labels2.SelectorFromSet(configLabels).String()) if err != nil { i.Logger.Error(err, "problem with finding configmap") + return } for _, partialConfig := range partialConfigs.Items { - if partialConfig.Name == newConfig.Name { + if partialConfig.Name == instance.Status.ServerConfigRef.Name { continue } @@ -115,22 +141,12 @@ func (i shardingConfig) Handle(ctx context.Context, instance *rhtasv1alpha1.Reko }) if err != nil { i.Logger.Error(err, "problem with deleting configmap", "name", partialConfig.Name) - } else { - i.Logger.Info("Remove invalid ConfigMap with rekor-sharding configuration", "name", partialConfig.Name) - i.Recorder.Eventf(instance, v1.EventTypeNormal, "ShardingConfigDeleted", "ConfigMap with sharding configuration deleted: %s", partialConfig.Name) + i.Recorder.Eventf(instance, v1.EventTypeWarning, "ShardingConfigDeleted", "Unable to delete secret: %s", partialConfig.Name) + continue } + i.Logger.Info("Remove invalid ConfigMap with rekor-sharding configuration", "name", partialConfig.Name) + i.Recorder.Eventf(instance, v1.EventTypeNormal, "ShardingConfigDeleted", "ConfigMap with sharding configuration deleted: %s", partialConfig.Name) } - - i.Recorder.Eventf(instance, v1.EventTypeNormal, "ShardingConfigCreated", "ConfigMap with sharding configuration created: %s", newConfig.Name) - instance.Status.ServerConfigRef = &rhtasv1alpha1.LocalObjectReference{Name: newConfig.Name} - - meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ - Type: actions.ServerCondition, - Status: metav1.ConditionFalse, - Reason: constants.Creating, - Message: "Sharding config created", - }) - return i.StatusUpdate(ctx, instance) } func createShardingConfigData(sharding []rhtasv1alpha1.RekorLogRange) (map[string]string, error) { diff --git a/internal/testing/action/result.go b/internal/testing/action/result.go index 216a86f2c..ce2e51623 100644 --- a/internal/testing/action/result.go +++ b/internal/testing/action/result.go @@ -35,17 +35,3 @@ func Requeue() *action.Result { Err: nil, } } - -func IsFailed(result *action.Result) bool { - if result == nil { - return false - } - return result.Err != nil -} - -func IsRequeue(result *action.Result) bool { - if result == nil { - return false - } - return result.Result.Requeue || result.Result.RequeueAfter > 0 -} diff --git a/test/e2e/support/common.go b/test/e2e/support/common.go index d236026c8..9c5f09f6f 100644 --- a/test/e2e/support/common.go +++ b/test/e2e/support/common.go @@ -16,6 +16,7 @@ import ( monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" v12 "k8s.io/api/apps/v1" v13 "k8s.io/api/batch/v1" + "k8s.io/apimachinery/pkg/runtime/schema" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -124,6 +125,10 @@ func DumpNamespace(ctx context.Context, cli client.Client, ns string) { // Example usage with mock data k8s := map[string]logTarget{} + secretList := &metav1.PartialObjectMetadataList{} + gvk := schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Secret"} + secretList.SetGroupVersionKind(gvk) + toDump := map[string]client.ObjectList{ "securesign.yaml": &v1alpha1.SecuresignList{}, "fulcio.yaml": &v1alpha1.FulcioList{}, @@ -138,6 +143,7 @@ func DumpNamespace(ctx context.Context, cli client.Client, ns string) { "job.yaml": &v13.JobList{}, "cronjob.yaml": &v13.CronJobList{}, "event.yaml": &v1.EventList{}, + "secret.yaml": secretList, } core.GinkgoWriter.Println("----------------------- Dumping namespace " + ns + " -----------------------")