Ver Fonte

feat(controller): log secret deletions and data-key changes (#6443)

* feat(controller): log secret deletions and data-key changes

ESO mutated and deleted Kubernetes Secrets with little log visibility,
which made it impossible to build alerting rules on destructive
operations. The existing logs only said operation=UPDATE with no
indication of which Secret or key was affected.

Add default-verbosity log lines that name the affected Secret, its
namespace, and the data keys involved, so operators can alert on
deletions and on values being emptied. Coverage:

- secret deletion when the provider returns no data (DeletionPolicy=Delete)
- managed-secret cleanup on ExternalSecret deletion (was V(1))
- orphaned-secret deletion
- a per-update data-key diff (added / updated / removed / emptied)

Only key names are logged, never secret values, per the maintainer
request not to log confidential data.

Fixes: external-secrets/external-secrets#2498
Signed-off-by: Alexander Chernov <alexander@chernov.it>

* fix(controller): move mutation logs to V(1) and guard diff behind Enabled check

Per reviewer feedback:
- All four mutation log lines (secret deleted, managed secret deleted,
  orphaned secret deleted, data keys changed) moved from log.Info to
  log.V(1).Info so they are suppressed at the default log level.
- diffSecretDataKeys is now called only inside a log.V(1).Enabled() guard,
  eliminating per-reconcile allocation when debug logging is off.
- Update docs/api/controller-options.md with a "Debug-level logging"
  section explaining how to enable the logs (--loglevel=debug / Helm
  log.level: debug) and what each message carries.

Fixes: external-secrets/external-secrets#2498
Signed-off-by: Alexander Chernov <alexander@chernov.it>

---------

Signed-off-by: Alexander Chernov <alexander@chernov.it>
Co-authored-by: Gergely Bräutigam <gergely.brautigam@sap.com>
Alexander Chernov há 1 semana atrás
pai
commit
40c4a41c78

+ 11 - 0
docs/api/controller-options.md

@@ -41,6 +41,17 @@ reconciler.
 | `--store-requeue-interval`                    | duration | 5m0s    | Default Time duration between reconciling (Cluster)SecretStores                                                                                                    |
 | `--enable-http2`                              | boolean  | false   | If set, HTTP/2 will be enabled for the metrics server                                                                                                              |
 
+### Debug-level logging
+
+Setting `--loglevel=debug` (Helm: `log.level: debug`) enables additional log lines that are suppressed at the default `info` level. These include:
+
+- **Secret deletion** -- logged when the controller deletes a managed Secret because the provider returned no data and `DeletionPolicy=Delete` is set. Fields: `secret`, `namespace`, `reason`.
+- **Managed secret cleanup** -- logged when a managed Secret is deleted because its owning ExternalSecret was deleted. Fields: `secret`, `namespace`, `reason`.
+- **Orphaned secret cleanup** -- logged when an orphaned Secret is deleted. Fields: `secret`, `namespace`.
+- **Secret data key diff** -- logged after every update where data keys actually changed (keys added, updated, removed, or emptied). Only key names are logged, never their values. Fields: `secret`, `namespace`, `added`, `updated`, `removed`, `emptied`.
+
+These messages can be used to build alerting rules on destructive operations. The diff computation is skipped entirely when debug logging is not enabled, so there is no performance impact at the default log level.
+
 ## Cert Controller Flags
 
 | Name                       | Type     | Default                  | Descripton                                                                                                            |

+ 33 - 7
pkg/controllers/externalsecret/externalsecret_controller.go

@@ -98,6 +98,13 @@ const (
 	logErrorSecretCacheNotSynced = "controller caches for Secret are not in sync"
 	logErrorUnmanagedStore       = "unable to determine if store is managed"
 
+	// log messages for mutating / destructive secret operations, emitted at
+	// V(1) so they are opt-in. They only ever carry key names, never values.
+	logSecretDeleted         = "deleted secret"
+	logManagedSecretDeleted  = "deleted managed secret"
+	logSecretDeletedOrphaned = "deleted orphaned secret"
+	logSecretDataChanged     = "secret data keys changed"
+
 	// error formats.
 	errConvert               = "error applying conversion strategy %s to keys: %w"
 	errRewrite               = "error applying rewrite to keys: %w"
@@ -434,6 +441,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
 					r.markAsFailed(msgErrorDeleteSecret, err, externalSecret, syncCallsError.With(resourceLabels), esv1.ConditionReasonSecretSyncedError)
 					return ctrl.Result{}, err
 				}
+				log.V(1).Info(logSecretDeleted, "secret", secretName, "namespace", externalSecret.Namespace, "reason", "DeletionPolicy=Delete and provider returned no data")
 				r.recorder.Event(externalSecret, v1.EventTypeNormal, esv1.ReasonDeleted, eventDeleted)
 			}
 
@@ -507,7 +515,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
 	case esv1.CreatePolicyMerge:
 		// update the secret, if it exists
 		if existingSecret.UID != "" {
-			err = r.updateSecret(ctx, existingSecret, mutationFunc, externalSecret, secretName)
+			err = r.updateSecret(ctx, log, existingSecret, mutationFunc, externalSecret, secretName)
 		} else {
 			// if the secret does not exist, we wait until the next refresh interval
 			// rather than returning an error which would requeue immediately
@@ -520,12 +528,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
 			err = r.createSecret(ctx, mutationFunc, externalSecret, secretName)
 		} else {
 			// if the secret exists, we should update it
-			err = r.updateSecret(ctx, existingSecret, mutationFunc, externalSecret, secretName)
+			err = r.updateSecret(ctx, log, existingSecret, mutationFunc, externalSecret, secretName)
 		}
 	case esv1.CreatePolicyOwner:
 		// we may have orphaned secrets to clean up,
 		// for example, if the target secret name was changed
-		err = r.deleteOrphanedSecrets(ctx, externalSecret, secretName)
+		err = r.deleteOrphanedSecrets(ctx, log, externalSecret, secretName)
 		if err != nil {
 			r.markAsFailed(msgErrorDeleteOrphaned, err, externalSecret, syncCallsError.With(resourceLabels), esv1.ConditionReasonSecretSyncedError)
 			return ctrl.Result{}, err
@@ -536,7 +544,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
 			err = r.createSecret(ctx, mutationFunc, externalSecret, secretName)
 		} else {
 			// if the secret exists, we should update it
-			err = r.updateSecret(ctx, existingSecret, mutationFunc, externalSecret, secretName)
+			err = r.updateSecret(ctx, log, existingSecret, mutationFunc, externalSecret, secretName)
 		}
 	}
 	if err != nil {
@@ -860,13 +868,13 @@ func (r *Reconciler) cleanupManagedSecrets(ctx context.Context, log logr.Logger,
 		if err := r.Delete(ctx, &secret); err != nil && !apierrors.IsNotFound(err) {
 			return err
 		}
-		log.V(1).Info("deleted managed secret", "secret", secretName)
+		log.V(1).Info(logManagedSecretDeleted, "secret", secretName, "namespace", externalSecret.Namespace, "reason", "ExternalSecret deleted")
 	}
 
 	return nil
 }
 
-func (r *Reconciler) deleteOrphanedSecrets(ctx context.Context, externalSecret *esv1.ExternalSecret, secretName string) error {
+func (r *Reconciler) deleteOrphanedSecrets(ctx context.Context, log logr.Logger, externalSecret *esv1.ExternalSecret, secretName string) error {
 	ownerLabel := esutils.ObjectHash(fmt.Sprintf("%v/%v", externalSecret.Namespace, externalSecret.Name))
 
 	// we use a PartialObjectMetadataList to avoid loading the full secret objects
@@ -890,6 +898,7 @@ func (r *Reconciler) deleteOrphanedSecrets(ctx context.Context, externalSecret *
 			if err != nil && !apierrors.IsNotFound(err) {
 				return err
 			}
+			log.V(1).Info(logSecretDeletedOrphaned, "secret", secretPartial.GetName(), "namespace", externalSecret.Namespace)
 			r.recorder.Event(externalSecret, v1.EventTypeNormal, esv1.ReasonDeleted, eventDeletedOrphaned)
 		}
 	}
@@ -928,7 +937,7 @@ func (r *Reconciler) createSecret(ctx context.Context, mutationFunc func(secret
 	return nil
 }
 
-func (r *Reconciler) updateSecret(ctx context.Context, existingSecret *v1.Secret, mutationFunc func(secret *v1.Secret) error, es *esv1.ExternalSecret, secretName string) error {
+func (r *Reconciler) updateSecret(ctx context.Context, log logr.Logger, existingSecret *v1.Secret, mutationFunc func(secret *v1.Secret) error, es *esv1.ExternalSecret, secretName string) error {
 	fqdn := fqdnFor(es.Name)
 
 	// fail if the secret does not exist
@@ -1000,6 +1009,23 @@ func (r *Reconciler) updateSecret(ctx context.Context, existingSecret *v1.Secret
 		return fmt.Errorf(errUpdate, updatedSecret.Name, err)
 	}
 
+	// only compute the key diff when debug verbosity is active (--loglevel=debug /
+	// log.level=debug in Helm). skipping it by default avoids per-reconcile
+	// allocation on every managed secret. we only ever log key names, never values.
+	if log.V(1).Enabled() {
+		added, updated, removed, emptied := diffSecretDataKeys(existingSecret.Data, updatedSecret.Data)
+		if len(added) > 0 || len(updated) > 0 || len(removed) > 0 {
+			log.V(1).Info(logSecretDataChanged,
+				"secret", secretName,
+				"namespace", es.Namespace,
+				"added", added,
+				"updated", updated,
+				"removed", removed,
+				"emptied", emptied,
+			)
+		}
+	}
+
 	r.recorder.Event(es, v1.EventTypeNormal, esv1.ReasonUpdated, eventUpdated)
 	return nil
 }

+ 36 - 0
pkg/controllers/externalsecret/util.go

@@ -17,8 +17,10 @@ limitations under the License.
 package externalsecret
 
 import (
+	"bytes"
 	"crypto/sha3"
 	"fmt"
+	"sort"
 
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -84,3 +86,37 @@ func fqdnFor(name string) string {
 	}
 	return fqdn
 }
+
+// diffSecretDataKeys compares a secret's data keys before and after a mutation
+// and returns sorted lists of the key names that were added, updated, removed,
+// or newly set to an empty value. "emptied" is a subset of added/updated whose
+// new value has zero length, which is the "value replaced with empty string"
+// case. It returns only key names, never the secret values, so the result is
+// safe to log.
+func diffSecretDataKeys(oldData, newData map[string][]byte) (added, updated, removed, emptied []string) {
+	for key, newVal := range newData {
+		oldVal, existed := oldData[key]
+		switch {
+		case !existed:
+			added = append(added, key)
+		case !bytes.Equal(oldVal, newVal):
+			updated = append(updated, key)
+		default:
+			// unchanged key, nothing to report
+			continue
+		}
+		if len(newVal) == 0 {
+			emptied = append(emptied, key)
+		}
+	}
+	for key := range oldData {
+		if _, ok := newData[key]; !ok {
+			removed = append(removed, key)
+		}
+	}
+	sort.Strings(added)
+	sort.Strings(updated)
+	sort.Strings(removed)
+	sort.Strings(emptied)
+	return added, updated, removed, emptied
+}

+ 89 - 0
pkg/controllers/externalsecret/util_test.go

@@ -180,3 +180,92 @@ func TestSetExternalSecretCondition(t *testing.T) {
 		})
 	}
 }
+
+func TestDiffSecretDataKeys(t *testing.T) {
+	tests := []struct {
+		name        string
+		oldData     map[string][]byte
+		newData     map[string][]byte
+		wantAdded   []string
+		wantUpdated []string
+		wantRemoved []string
+		wantEmptied []string
+	}{
+		{
+			name:    "no change",
+			oldData: map[string][]byte{"a": []byte("1"), "b": []byte("2")},
+			newData: map[string][]byte{"a": []byte("1"), "b": []byte("2")},
+		},
+		{
+			name:      "key added",
+			oldData:   map[string][]byte{"a": []byte("1")},
+			newData:   map[string][]byte{"a": []byte("1"), "b": []byte("2")},
+			wantAdded: []string{"b"},
+		},
+		{
+			name:        "key removed",
+			oldData:     map[string][]byte{"a": []byte("1"), "b": []byte("2")},
+			newData:     map[string][]byte{"a": []byte("1")},
+			wantRemoved: []string{"b"},
+		},
+		{
+			name:        "value changed",
+			oldData:     map[string][]byte{"a": []byte("1")},
+			newData:     map[string][]byte{"a": []byte("2")},
+			wantUpdated: []string{"a"},
+		},
+		{
+			name:        "existing value emptied",
+			oldData:     map[string][]byte{"a": []byte("1")},
+			newData:     map[string][]byte{"a": []byte("")},
+			wantUpdated: []string{"a"},
+			wantEmptied: []string{"a"},
+		},
+		{
+			name:      "new key added empty",
+			oldData:   map[string][]byte{"a": []byte("1")},
+			newData:   map[string][]byte{"a": []byte("1"), "b": []byte("")},
+			wantAdded: []string{"b"},
+			// b is both added and emptied; emptied flags the empty value.
+			wantEmptied: []string{"b"},
+		},
+		{
+			name:      "results are sorted",
+			oldData:   map[string][]byte{},
+			newData:   map[string][]byte{"c": []byte("1"), "a": []byte("2"), "b": []byte("3")},
+			wantAdded: []string{"a", "b", "c"},
+		},
+		{
+			name:        "mixed add update remove empty",
+			oldData:     map[string][]byte{"keep": []byte("x"), "change": []byte("old"), "gone": []byte("y")},
+			newData:     map[string][]byte{"keep": []byte("x"), "change": []byte("new"), "fresh": []byte(""), "added": []byte("z")},
+			wantAdded:   []string{"added", "fresh"},
+			wantUpdated: []string{"change"},
+			wantRemoved: []string{"gone"},
+			wantEmptied: []string{"fresh"},
+		},
+		{
+			name:    "nil maps are safe",
+			oldData: nil,
+			newData: nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			added, updated, removed, emptied := diffSecretDataKeys(tt.oldData, tt.newData)
+			if diff := cmp.Diff(tt.wantAdded, added); diff != "" {
+				t.Errorf("added (-want, +got)\n%s", diff)
+			}
+			if diff := cmp.Diff(tt.wantUpdated, updated); diff != "" {
+				t.Errorf("updated (-want, +got)\n%s", diff)
+			}
+			if diff := cmp.Diff(tt.wantRemoved, removed); diff != "" {
+				t.Errorf("removed (-want, +got)\n%s", diff)
+			}
+			if diff := cmp.Diff(tt.wantEmptied, emptied); diff != "" {
+				t.Errorf("emptied (-want, +got)\n%s", diff)
+			}
+		})
+	}
+}