Просмотр исходного кода

fix: prevent feedback loop

Signed-off-by: Moritz Johner <beller.moritz@googlemail.com>
Moritz Johner 3 месяцев назад
Родитель
Сommit
1d10be17f3

+ 70 - 7
pkg/controllers/externalsecret/externalsecret_controller.go

@@ -47,6 +47,7 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/controller"
 	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+	"sigs.k8s.io/controller-runtime/pkg/event"
 	"sigs.k8s.io/controller-runtime/pkg/handler"
 	"sigs.k8s.io/controller-runtime/pkg/predicate"
 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -119,6 +120,9 @@ const (
 	eventDeletedOrphaned          = "secret deleted because it was orphaned"
 	eventMissingProviderSecret    = "secret does not exist at provider using spec.dataFrom[%d]"
 	eventMissingProviderSecretKey = "secret does not exist at provider using spec.dataFrom[%d] (key=%s)"
+
+	// cacheSyncRetryDelay is used when partial and full secret caches are temporarily out of sync.
+	cacheSyncRetryDelay = 200 * time.Millisecond
 )
 
 // these errors are explicitly defined so we can detect them with `errors.Is()`.
@@ -138,6 +142,7 @@ const (
 type Reconciler struct {
 	client.Client
 	SecretClient              client.Client
+	APIReader                 client.Reader
 	Log                       logr.Logger
 	Scheme                    *runtime.Scheme
 	RestConfig                *rest.Config
@@ -334,13 +339,28 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
 
 	// ensure the full cache is up-to-date
 	// NOTE: this prevents race conditions between the partial and full cache.
-	//       we return an error so we get an exponential backoff if we end up looping,
-	//       for example, during high cluster load and frequent updates to the target secret by other controllers.
+	//       we verify against the API server before retrying, to avoid aggressive error retries
+	//       when the cache is temporarily stale.
 	if secretPartial.UID != existingSecret.UID || secretPartial.ResourceVersion != existingSecret.ResourceVersion {
-		err = fmt.Errorf(errSecretCachesNotSynced, secretName)
-		log.Error(err, logErrorSecretCacheNotSynced, "secretName", secretName, "secretNamespace", externalSecret.Namespace)
-		syncCallsError.With(resourceLabels).Inc()
-		return ctrl.Result{}, err
+		authoritativeSecret := &v1.Secret{}
+		secretReader := r.APIReader
+		if secretReader == nil {
+			secretReader = r.SecretClient
+		}
+		getErr := secretReader.Get(ctx, client.ObjectKey{Name: secretName, Namespace: externalSecret.Namespace}, authoritativeSecret)
+		if getErr != nil && !apierrors.IsNotFound(getErr) {
+			log.Error(getErr, logErrorGetSecret, "secretName", secretName, "secretNamespace", externalSecret.Namespace)
+			syncCallsError.With(resourceLabels).Inc()
+			return ctrl.Result{}, getErr
+		}
+
+		if secretPartial.UID != authoritativeSecret.UID || secretPartial.ResourceVersion != authoritativeSecret.ResourceVersion {
+			log.V(1).Info(logErrorSecretCacheNotSynced, "secretName", secretName, "secretNamespace", externalSecret.Namespace)
+			return ctrl.Result{RequeueAfter: cacheSyncRetryDelay}, nil
+		}
+
+		// use the authoritative view so we can continue reconciliation even if the full cache is stale.
+		existingSecret = authoritativeSecret
 	}
 
 	// refresh will be skipped if ALL the following conditions are met:
@@ -1220,6 +1240,9 @@ func genericTargetContentHash(obj *unstructured.Unstructured) (string, error) {
 // SetupWithManager returns a new controller builder that will be started by the provided Manager.
 func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opts controller.Options) error {
 	r.recorder = mgr.GetEventRecorderFor("external-secrets")
+	if r.APIReader == nil {
+		r.APIReader = mgr.GetAPIReader()
+	}
 	// Initialize informer manager only if generic targets are allowed
 	if r.AllowGenericTargets && r.informerManager == nil {
 		r.informerManager = NewInformerManager(ctx, mgr.GetCache(), r.Client, r.Log.WithName("informer-manager"))
@@ -1265,10 +1288,17 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
 		return hasLabel && value == esv1.LabelManagedValue
 	})
 
+	// filter ExternalSecret updates to avoid requeueing on status-only changes.
+	externalSecretPredicate := predicate.Funcs{
+		UpdateFunc: func(e event.UpdateEvent) bool {
+			return shouldEnqueueExternalSecretUpdate(e.ObjectOld, e.ObjectNew)
+		},
+	}
+
 	// Build the controller
 	builder := ctrl.NewControllerManagedBy(mgr).
 		WithOptions(opts).
-		For(&esv1.ExternalSecret{}).
+		For(&esv1.ExternalSecret{}, builder.WithPredicates(externalSecretPredicate)).
 		// we cant use Owns(), as we don't set ownerReferences when the creationPolicy is not Owner.
 		// we use WatchesMetadata() to reduce memory usage, as otherwise we have to process full secret objects.
 		WatchesMetadata(
@@ -1286,6 +1316,39 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
 	return builder.Complete(r)
 }
 
+// shouldEnqueueExternalSecretUpdate returns true for spec/metadata updates that can affect reconciliation behavior,
+// while ignoring status-only updates.
+func shouldEnqueueExternalSecretUpdate(oldObj, newObj client.Object) bool {
+	oldES, oldOK := oldObj.(*esv1.ExternalSecret)
+	newES, newOK := newObj.(*esv1.ExternalSecret)
+	if !oldOK || !newOK {
+		return true
+	}
+
+	if oldES.GetGeneration() != newES.GetGeneration() {
+		return true
+	}
+
+	if !equality.Semantic.DeepEqual(oldES.GetLabels(), newES.GetLabels()) {
+		return true
+	}
+
+	if !equality.Semantic.DeepEqual(oldES.GetAnnotations(), newES.GetAnnotations()) {
+		return true
+	}
+
+	oldDeletion := oldES.GetDeletionTimestamp()
+	newDeletion := newES.GetDeletionTimestamp()
+	if oldDeletion == nil && newDeletion == nil {
+		return false
+	}
+	if oldDeletion == nil || newDeletion == nil {
+		return true
+	}
+
+	return !oldDeletion.Equal(newDeletion)
+}
+
 func (r *Reconciler) findObjectsForSecret(ctx context.Context, secret client.Object) []reconcile.Request {
 	externalSecretsList := &esv1.ExternalSecretList{}
 	listOps := &client.ListOptions{

+ 82 - 0
pkg/controllers/externalsecret/externalsecret_controller_test.go

@@ -2662,6 +2662,88 @@ var _ = Describe("ExternalSecret refresh logic", func() {
 	})
 })
 
+var _ = Describe("ExternalSecret update predicate", func() {
+	It("should ignore status-only updates", func() {
+		oldES := &esv1.ExternalSecret{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "foo",
+				Namespace:  "default",
+				Generation: 1,
+			},
+			Status: esv1.ExternalSecretStatus{
+				RefreshTime: metav1.Now(),
+			},
+		}
+		newES := oldES.DeepCopy()
+		newES.Status.RefreshTime = metav1.NewTime(time.Now().Add(time.Minute))
+
+		Expect(shouldEnqueueExternalSecretUpdate(oldES, newES)).To(BeFalse())
+	})
+
+	It("should enqueue when generation changes", func() {
+		oldES := &esv1.ExternalSecret{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "foo",
+				Namespace:  "default",
+				Generation: 1,
+			},
+		}
+		newES := oldES.DeepCopy()
+		newES.Generation = 2
+
+		Expect(shouldEnqueueExternalSecretUpdate(oldES, newES)).To(BeTrue())
+	})
+
+	It("should enqueue when labels change", func() {
+		oldES := &esv1.ExternalSecret{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "foo",
+				Namespace:  "default",
+				Generation: 1,
+				Labels: map[string]string{
+					"app": "a",
+				},
+			},
+		}
+		newES := oldES.DeepCopy()
+		newES.Labels["app"] = "b"
+
+		Expect(shouldEnqueueExternalSecretUpdate(oldES, newES)).To(BeTrue())
+	})
+
+	It("should enqueue when annotations change", func() {
+		oldES := &esv1.ExternalSecret{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "foo",
+				Namespace:  "default",
+				Generation: 1,
+				Annotations: map[string]string{
+					"note": "a",
+				},
+			},
+		}
+		newES := oldES.DeepCopy()
+		newES.Annotations["note"] = "b"
+
+		Expect(shouldEnqueueExternalSecretUpdate(oldES, newES)).To(BeTrue())
+	})
+
+	It("should enqueue when deletion timestamp changes", func() {
+		oldES := &esv1.ExternalSecret{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:       "foo",
+				Namespace:  "default",
+				Generation: 1,
+			},
+		}
+		newES := oldES.DeepCopy()
+		now := metav1.Now()
+		newES.DeletionTimestamp = &now
+
+		Expect(shouldEnqueueExternalSecretUpdate(oldES, newES)).To(BeTrue())
+	})
+})
+
 var _ = Describe("ExternalSecret refresh policy", func() {
 	Context("RefreshPolicy=CreatedOnce", func() {
 		It("should refresh when SyncedResourceVersion is empty", func() {