Browse Source

feat: move experimental-enable-vault-token-cache out of experimental and add expiry to validation (#5397)

Co-authored-by: Gergely Brautigam <182850+Skarlso@users.noreply.github.com>
Co-authored-by: Gergely Bräutigam <skarlso777@gmail.com>
Co-authored-by: Gergely Bräutigam <gergely.brautigam@sap.com>
Tony Gosselin 1 month ago
parent
commit
e325bced50

+ 3 - 0
deploy/charts/external-secrets/README.md

@@ -226,6 +226,9 @@ The command removes all the Kubernetes components associated with the chart and
 | systemAuthDelegator | bool | `false` | If true the system:auth-delegator ClusterRole will be added to RBAC |
 | tolerations | list | `[]` |  |
 | topologySpreadConstraints | list | `[]` |  |
+| vault | object | `{"enableTokenCache":false,"tokenCacheSize":262144}` | Vault token cache configuration |
+| vault.enableTokenCache | bool | `false` | Enable Vault token cache. External secrets will reuse the Vault token without creating a new one on each request. |
+| vault.tokenCacheSize | int | `262144` | Maximum size of Vault token cache. Only used if enableTokenCache is true. |
 | webhook.affinity | object | `{}` |  |
 | webhook.annotations | object | `{}` | Annotations to place on validating webhook configuration. |
 | webhook.certCheckInterval | string | `"5m"` | Specifies the time to check if the cert is valid |

+ 6 - 0
deploy/charts/external-secrets/templates/deployment.yaml

@@ -107,6 +107,12 @@ spec:
           {{- if .Values.enableHTTP2 }}
           - --enable-http2=true
           {{- end }}
+          {{- if .Values.vault.enableTokenCache }}
+          - --enable-vault-token-cache=true
+          {{- end }}
+          {{- if and .Values.vault.enableTokenCache .Values.vault.tokenCacheSize }}
+          - --vault-token-cache-size={{ .Values.vault.tokenCacheSize }}
+          {{- end }}
           {{- if .Values.concurrent }}
           - --concurrent={{ .Values.concurrent }}
           {{- end }}

+ 11 - 0
deploy/charts/external-secrets/values.schema.json

@@ -835,6 +835,17 @@
         "topologySpreadConstraints": {
             "type": "array"
         },
+        "vault": {
+            "type": "object",
+            "properties": {
+                "enableTokenCache": {
+                    "type": "boolean"
+                },
+                "tokenCacheSize": {
+                    "type": "integer"
+                }
+            }
+        },
         "webhook": {
             "type": "object",
             "properties": {

+ 7 - 0
deploy/charts/external-secrets/values.yaml

@@ -154,6 +154,13 @@ createOperator: true
 # -- if true, HTTP2 will be enabled for the services created by all controllers, curently metrics and webhook.
 enableHTTP2: false
 
+# -- Vault token cache configuration
+vault:
+  # -- Enable Vault token cache. External secrets will reuse the Vault token without creating a new one on each request.
+  enableTokenCache: false
+  # -- Maximum size of Vault token cache. Only used if enableTokenCache is true.
+  tokenCacheSize: 262144
+
 # -- Specifies the number of concurrent ExternalSecret Reconciles external-secret executes at
 # a time.
 concurrent: 1

+ 4 - 4
design/014-feature-flag-consolidation.md

@@ -184,10 +184,10 @@ Create `docs/flags.md` (or equivalent in documentation site) listing all flags:
 ## Provider Flags
 
 ### Vault
-| Feature name | Flag                                       | Default  | Description          |
-|--------------|--------------------------------------------|----------|----------------------|
-| Token Cache  | `--experimental-enable-vault-token-cache`  | `false`  | Enable token caching |
-| Token Cache  | `--experimental-vault-token-cache-size`    | `1000`   | Cache size           |
+| Feature name | Flag                          | Default  | Description          |
+|--------------|-------------------------------|----------|----------------------|
+| Token Cache  | `--enable-vault-token-cache`  | `false`  | Enable token caching |
+| Token Cache  | `--vault-token-cache-size`    | `262144` | Cache size           |
 
 ### Doppler
 | Feature name | Flag                        | Default  | Description           |

+ 2 - 0
docs/api/controller-options.md

@@ -29,6 +29,8 @@ reconciler.
 | `--enable-flood-gate`                         | boolean  | true    | Enable flood gate. External secret will be reconciled only if the ClusterStore or Store have an healthy or unknown state.                                          |
 | `--enable-extended-metric-labels`             | boolean  | true    | Enable recommended kubernetes annotations as labels in metrics.                                                                                                    |
 | `--enable-leader-election`                    | boolean  | false   | Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.                                              |
+| `--enable-vault-token-cache`                   | boolean  | false   | Enable Vault token cache. External secrets will reuse the Vault token without creating a new one on each request.                                                |
+| `--vault-token-cache-size`                     | int      | 0       | Maximum size of Vault token cache. Only used if --enable-vault-token-cache is set.                                                                                |
 | `--experimental-enable-aws-session-cache`     | boolean  | false   | DEPRECATED: this flag is no longer used and will be removed since aws sdk v2 has its own session cache.                                                            |
 | `--help`                                      |          |         | help for external-secrets                                                                                                                                          |
 | `--loglevel`                                  | string   | info    | loglevel to use, one of: debug, info, warn, error, dpanic, panic, fatal                                                                                            |

+ 38 - 0
docs/provider/hashicorp-vault.md

@@ -538,6 +538,44 @@ spec:
         # ...
 ```
 
+### Token Cache Configuration
+
+The Vault provider supports token caching to improve performance by reusing Vault tokens across multiple requests instead of creating new ones each time. This is particularly useful when using authentication methods that generate short-lived tokens.
+
+#### Configuration Flags
+
+The following command-line flags control the Vault token cache behavior:
+
+- `--enable-vault-token-cache`: Enable Vault token cache (default: `false`)
+- `--vault-token-cache-size`: Maximum size of the Vault token cache (default: `262144`)
+
+#### Usage
+
+To enable token caching, set the `--enable-vault-token-cache` flag to `true`:
+
+```bash
+external-secrets --enable-vault-token-cache --vault-token-cache-size=262144
+```
+
+#### Cache Behavior
+
+- **Cache Key**: The cache uses a combination of the SecretStore name, namespace, and kind as the cache key
+- **Token Validation**: Before using a cached token, the provider validates its TTL to ensure it hasn't expired
+- **Cache Eviction**: When the cache reaches its maximum size, the least recently used tokens are evicted
+- **Token Revocation**: When tokens are evicted from the cache, they are properly revoked from Vault
+
+#### When to Use Token Caching
+
+Token caching is beneficial when:
+- Using authentication methods that generate short-lived tokens (e.g., AppRole, Kubernetes auth)
+- Running multiple ExternalSecrets that use the same SecretStore
+- Experiencing high token generation overhead
+
+Token caching should **not** be used when:
+- Using static tokens (no performance benefit)
+- Security requirements mandate fresh tokens for each request
+- Memory usage is a concern
+
 #### Read Your Writes
 
 Vault 1.10.0 and later encodes information in the token to detect the case

+ 1 - 1
e2e/framework/addon/eso.go

@@ -82,7 +82,7 @@ func NewESO(mutators ...MutationFunc) *ESO {
 					Value: "100",
 				},
 				{
-					Key:   "extraArgs.experimental-enable-vault-token-cache",
+					Key:   "extraArgs.enable-vault-token-cache",
 					Value: "true",
 				},
 			},

+ 39 - 15
providers/v1/vault/auth.go

@@ -21,6 +21,7 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
+	"time"
 
 	vault "github.com/hashicorp/vault/api"
 	authv1 "k8s.io/api/authentication/v1"
@@ -58,13 +59,21 @@ func (c *client) setAuth(ctx context.Context, cfg *vault.Config) error {
 	defer restoreNamespace()
 
 	tokenExists := false
-	var err error
+	var (
+		err    error
+		expiry *time.Time
+	)
 	if c.client.Token() != "" {
-		tokenExists, err = checkToken(ctx, c.token)
+		tokenExists, expiry, err = checkToken(ctx, c.token)
 	}
+	// update the token before returning so it's always the latest value even if the token does not exist.
+	c.tokenExpiryTime = expiry
 	if tokenExists {
-		c.log.V(1).Info("Re-using existing token")
-		return err
+		// if token expiry exists only re-use it IF the token expiry didn't expire
+		if expiry == nil || expiry.After(time.Now()) {
+			c.log.V(1).Info("Re-using existing token")
+			return err
+		}
 	}
 
 	tokenExists, err = setSecretKeyToken(ctx, c)
@@ -157,49 +166,64 @@ func createServiceAccountToken(
 }
 
 // checkToken does a lookup and checks if the provided token exists.
-func checkToken(ctx context.Context, token vaultutil.Token) (bool, error) {
+func checkToken(ctx context.Context, token vaultutil.Token) (bool, *time.Time, error) {
 	// https://www.vaultproject.io/api-docs/auth/token#lookup-a-token-self
 	resp, err := token.LookupSelfWithContext(ctx)
 	metrics.ObserveAPICall(constants.ProviderHCVault, constants.CallHCVaultLookupSelf, err)
 	if err != nil {
-		return false, err
+		return false, nil, err
 	}
 	// LookupSelfWithContext() calls ParseSecret(), which has several places
 	// that return no data and no error, including when a token is expired.
 	if resp == nil {
-		return false, errors.New("no response nor error for token lookup")
+		return false, nil, errors.New("no response nor error for token lookup")
 	}
 	t, ok := resp.Data["type"]
 	if !ok {
-		return false, errors.New("could not assert token type")
+		return false, nil, errors.New("could not assert token type")
 	}
 	tokenType := t.(string)
 	if tokenType == "batch" {
-		return false, nil
+		return false, nil, nil
 	}
 	ttl, ok := resp.Data["ttl"]
 	if !ok {
-		return false, errors.New("no TTL found in response")
+		return false, nil, errors.New("no TTL found in response")
 	}
 	ttlInt, err := ttl.(json.Number).Int64()
 	if err != nil {
-		return false, fmt.Errorf("invalid token TTL: %v: %w", ttl, err)
+		return false, nil, fmt.Errorf("invalid token TTL: %v: %w", ttl, err)
 	}
 	expireTime, ok := resp.Data["expire_time"]
 	if !ok {
-		return false, errors.New("no expiration time found in response")
+		return false, nil, errors.New("no expiration time found in response")
 	}
 	if ttlInt < 60 && expireTime != nil {
 		// Treat expirable tokens that are about to expire as already expired.
 		// This ensures that the token won't expire in between this check and
 		// performing the actual operation.
-		return false, nil
+		return false, nil, nil
+	}
+
+	if expireTime == nil {
+		return true, nil, nil
 	}
-	return true, nil
+
+	et, ok := expireTime.(string)
+	if !ok {
+		return false, nil, fmt.Errorf("expire time is not a string but is: %T", expireTime)
+	}
+
+	parsedExpiry, err := time.Parse(time.RFC3339, et)
+	if err != nil {
+		return false, nil, fmt.Errorf("invalid token expiration time: %v: %w", et, err)
+	}
+
+	return true, &parsedExpiry, nil
 }
 
 func revokeTokenIfValid(ctx context.Context, client vaultutil.Client) error {
-	valid, err := checkToken(ctx, client.AuthToken())
+	valid, _, err := checkToken(ctx, client.AuthToken())
 	if err != nil {
 		return fmt.Errorf(errVaultRevokeToken, err)
 	}

+ 2 - 2
providers/v1/vault/auth_test.go

@@ -203,7 +203,7 @@ func TestCheckTokenErrors(t *testing.T) {
 				},
 			}
 
-			cached, _ := checkToken(context.Background(), token)
+			cached, _, _ := checkToken(context.Background(), token)
 			if cached {
 				t.Errorf("%v", tc.message)
 			}
@@ -271,7 +271,7 @@ func TestCheckTokenTtl(t *testing.T) {
 				},
 			}
 
-			cached, err := checkToken(context.Background(), token)
+			cached, _, err := checkToken(context.Background(), token)
 			if cached != tc.cache || err != nil {
 				t.Errorf("%v: err = %v", tc.message, err)
 			}

+ 12 - 10
providers/v1/vault/client.go

@@ -23,6 +23,7 @@ import (
 	"errors"
 	"fmt"
 	"net/http"
+	"time"
 
 	"github.com/go-logr/logr"
 	vault "github.com/hashicorp/vault/api"
@@ -39,16 +40,17 @@ import (
 var _ esv1.SecretsClient = &client{}
 
 type client struct {
-	kube      kclient.Client
-	store     *esv1.VaultProvider
-	log       logr.Logger
-	corev1    typedcorev1.CoreV1Interface
-	client    vaultutil.Client
-	auth      vaultutil.Auth
-	logical   vaultutil.Logical
-	token     vaultutil.Token
-	namespace string
-	storeKind string
+	kube            kclient.Client
+	store           *esv1.VaultProvider
+	log             logr.Logger
+	corev1          typedcorev1.CoreV1Interface
+	client          vaultutil.Client
+	auth            vaultutil.Auth
+	logical         vaultutil.Logical
+	token           vaultutil.Token
+	tokenExpiryTime *time.Time
+	namespace       string
+	storeKind       string
 }
 
 func (c *client) newConfig(ctx context.Context) (*vault.Config, error) {

+ 36 - 6
providers/v1/vault/provider.go

@@ -182,6 +182,7 @@ func (p *Provider) initClient(ctx context.Context, c *client, client vaultutil.C
 	if c.storeKind == esv1.ClusterSecretStoreKind && c.namespace == "" && isReferentSpec(vaultSpec) {
 		return c, nil
 	}
+	// set auth also sets the token expiry value
 	if err := c.setAuth(ctx, cfg); err != nil {
 		return nil, err
 	}
@@ -323,24 +324,53 @@ func initCache(size int) {
 }
 
 func init() {
-	var vaultTokenCacheSize int
+	var (
+		vaultTokenCacheSize     int
+		experimentalEnableCache bool
+		experimentalCacheSize   int
+	)
+
 	fs := pflag.NewFlagSet("vault", pflag.ExitOnError)
 	fs.BoolVar(
 		&enableCache,
-		"experimental-enable-vault-token-cache",
+		"enable-vault-token-cache",
 		false,
-		"Enable experimental Vault token cache. External secrets will reuse the Vault token without creating a new one on each request.",
+		"Enable Vault token cache. External secrets will reuse the Vault token without creating a new one on each request.",
 	)
 	// max. 265k vault leases with 30bytes each ~= 7MB
 	fs.IntVar(
 		&vaultTokenCacheSize,
+		"vault-token-cache-size",
+		defaultCacheSize,
+		"Maximum size of Vault token cache. Only used if --enable-vault-token-cache is set.",
+	)
+	fs.BoolVar(
+		&experimentalEnableCache,
+		"experimental-enable-vault-token-cache",
+		false,
+		"Enable Vault token cache. External secrets will reuse the Vault token without creating a new one on each request.",
+	)
+	// max. 265k vault leases with 30bytes each ~= 7MB
+	fs.IntVar(
+		&experimentalCacheSize,
 		"experimental-vault-token-cache-size",
 		defaultCacheSize,
-		"Maximum size of Vault token cache. When more tokens than Only used if --experimental-enable-vault-token-cache is set.",
+		"Maximum size of Vault token cache. Only used if --experimental-enable-vault-token-cache is set.",
 	)
 	feature.Register(feature.Feature{
-		Flags:      fs,
-		Initialize: func() { initCache(vaultTokenCacheSize) },
+		Flags: fs,
+		Initialize: func() {
+			// Check for deprecated experimental flags and warn users
+			if experimentalEnableCache {
+				logger.Info("DEPRECATION WARNING: --experimental-enable-vault-token-cache is deprecated. Please use --enable-vault-token-cache instead. This flag will be removed in a future release.")
+				enableCache = true
+			}
+			if experimentalCacheSize > 0 {
+				logger.Info("DEPRECATION WARNING: --experimental-vault-token-cache-size is deprecated. Please use --vault-token-cache-size instead. This flag will be removed in a future release.")
+				vaultTokenCacheSize = experimentalCacheSize
+			}
+			initCache(vaultTokenCacheSize)
+		},
 	})
 }
 

+ 59 - 0
providers/v1/vault/provider_test.go

@@ -883,3 +883,62 @@ func resetCache() {
 	enableCache = false
 	clientCache = nil
 }
+
+func TestValidateTokenExpiry(t *testing.T) {
+	t.Run("skip checkToken when token expiry is in the future", func(t *testing.T) {
+		futureExpiry := time.Now().Add(1 * time.Hour)
+		c := &client{
+			store:           makeValidSecretStore().Spec.Provider.Vault,
+			storeKind:       esv1.SecretStoreKind,
+			tokenExpiryTime: &futureExpiry,
+		}
+		result, err := c.Validate()
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if result != esv1.ValidationResultReady {
+			t.Fatalf("expected ValidationResultReady, got %v", result)
+		}
+	})
+
+	t.Run("call checkToken when token expiry is in the past", func(t *testing.T) {
+		pastExpiry := time.Now().Add(-1 * time.Hour)
+		c := &client{
+			store:           makeValidSecretStore().Spec.Provider.Vault,
+			storeKind:       esv1.SecretStoreKind,
+			tokenExpiryTime: &pastExpiry,
+			token: fake.Token{
+				LookupSelfWithContextFn: func(ctx context.Context) (*vault.Secret, error) {
+					return nil, errors.New("token expired")
+				},
+			},
+		}
+		result, err := c.Validate()
+		if err == nil {
+			t.Fatal("expected error, got nil")
+		}
+		if result != esv1.ValidationResultError {
+			t.Fatalf("expected ValidationResultError, got %v", result)
+		}
+	})
+
+	t.Run("call checkToken when token expiry is nil", func(t *testing.T) {
+		c := &client{
+			store:           makeValidSecretStore().Spec.Provider.Vault,
+			storeKind:       esv1.SecretStoreKind,
+			tokenExpiryTime: nil,
+			token: fake.Token{
+				LookupSelfWithContextFn: func(ctx context.Context) (*vault.Secret, error) {
+					return nil, errors.New("lookup failed")
+				},
+			},
+		}
+		result, err := c.Validate()
+		if err == nil {
+			t.Fatal("expected error, got nil")
+		}
+		if result != esv1.ValidationResultError {
+			t.Fatalf("expected ValidationResultError, got %v", result)
+		}
+	})
+}

+ 5 - 1
providers/v1/vault/validate.go

@@ -20,6 +20,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"time"
 
 	"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
 
@@ -184,7 +185,10 @@ func (c *client) Validate() (esv1.ValidationResult, error) {
 	if c.storeKind == esv1.ClusterSecretStoreKind && isReferentSpec(c.store) {
 		return esv1.ValidationResultUnknown, nil
 	}
-	_, err := checkToken(context.Background(), c.token)
+	if c.tokenExpiryTime != nil && c.tokenExpiryTime.After(time.Now()) {
+		return esv1.ValidationResultReady, nil
+	}
+	_, _, err := checkToken(context.Background(), c.token)
 	if err != nil {
 		return esv1.ValidationResultError, fmt.Errorf(errInvalidCredentials, err)
 	}