metrics.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /*
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. */
  12. package clientmanager
  13. import (
  14. "fmt"
  15. "github.com/prometheus/client_golang/prometheus"
  16. "sigs.k8s.io/controller-runtime/pkg/metrics"
  17. )
  18. var (
  19. // ClientManager Gauges
  20. clientsCachedTotal = prometheus.NewGaugeVec(
  21. prometheus.GaugeOpts{
  22. Name: "clientmanager_clients_cached_total",
  23. Help: "Total number of cached provider clients",
  24. },
  25. []string{"provider_type"},
  26. )
  27. // ClientManager Counters
  28. cacheHitsTotal = prometheus.NewCounterVec(
  29. prometheus.CounterOpts{
  30. Name: "clientmanager_cache_hits_total",
  31. Help: "Total number of client cache hits",
  32. },
  33. []string{"provider_type"},
  34. )
  35. cacheInvalidationsTotal = prometheus.NewCounterVec(
  36. prometheus.CounterOpts{
  37. Name: "clientmanager_cache_invalidations_total",
  38. Help: "Total number of client cache invalidations",
  39. },
  40. []string{"provider_type", "reason"},
  41. )
  42. )
  43. // ClientManagerMetrics interface for testability
  44. type ClientManagerMetrics interface {
  45. RecordCacheHit(providerType string)
  46. RecordCacheMiss(providerType string)
  47. RecordCacheInvalidation(providerType string, reason string)
  48. UpdateCachedClients(providerType string, count int)
  49. }
  50. // defaultClientManagerMetrics implements ClientManagerMetrics using Prometheus
  51. type defaultClientManagerMetrics struct{}
  52. // RecordCacheHit records a cache hit
  53. func (m *defaultClientManagerMetrics) RecordCacheHit(providerType string) {
  54. cacheHitsTotal.WithLabelValues(providerType).Inc()
  55. }
  56. // RecordCacheMiss records a cache miss
  57. func (m *defaultClientManagerMetrics) RecordCacheMiss(providerType string) {
  58. // Cache misses are implicit - we don't track them separately
  59. // The absence of a hit implies a miss
  60. }
  61. // RecordCacheInvalidation records a cache invalidation
  62. func (m *defaultClientManagerMetrics) RecordCacheInvalidation(providerType string, reason string) {
  63. cacheInvalidationsTotal.WithLabelValues(providerType, reason).Inc()
  64. }
  65. // UpdateCachedClients updates the total cached clients gauge
  66. func (m *defaultClientManagerMetrics) UpdateCachedClients(providerType string, count int) {
  67. clientsCachedTotal.WithLabelValues(providerType).Set(float64(count))
  68. }
  69. // Global instance
  70. var clientManagerMetrics ClientManagerMetrics = &defaultClientManagerMetrics{}
  71. // RegisterMetrics registers all client manager metrics with the controller-runtime metrics registry
  72. func RegisterMetrics() error {
  73. collectors := []prometheus.Collector{
  74. clientsCachedTotal,
  75. cacheHitsTotal,
  76. cacheInvalidationsTotal,
  77. }
  78. for _, collector := range collectors {
  79. if err := metrics.Registry.Register(collector); err != nil {
  80. // Check if already registered
  81. if _, ok := err.(prometheus.AlreadyRegisteredError); ok {
  82. continue
  83. }
  84. return fmt.Errorf("failed to register clientmanager metric: %w", err)
  85. }
  86. }
  87. // Initialize metrics with zero values so they appear in /metrics output
  88. // This ensures metrics are visible even before any cache operations occur
  89. for _, providerType := range []string{"provider", "cluster-provider"} {
  90. clientsCachedTotal.WithLabelValues(providerType).Set(0)
  91. cacheHitsTotal.WithLabelValues(providerType).Add(0)
  92. cacheInvalidationsTotal.WithLabelValues(providerType, "generation_change").Add(0)
  93. cacheInvalidationsTotal.WithLabelValues(providerType, "store_mismatch").Add(0)
  94. }
  95. return nil
  96. }
  97. // GetClientManagerMetrics returns the client manager metrics instance (for testing)
  98. func GetClientManagerMetrics() ClientManagerMetrics {
  99. return clientManagerMetrics
  100. }