mirror of
https://github.com/stakater/Reloader.git
synced 2026-02-14 09:59:50 +00:00
feat: Load tests
This commit is contained in:
@@ -103,6 +103,8 @@ func NewController(
|
||||
|
||||
// Add function to add a new object to the queue in case of creating a resource
|
||||
func (c *Controller) Add(obj interface{}) {
|
||||
// Record event received
|
||||
c.collectors.RecordEventReceived("add", c.resource)
|
||||
|
||||
switch object := obj.(type) {
|
||||
case *v1.Namespace:
|
||||
@@ -112,11 +114,14 @@ func (c *Controller) Add(obj interface{}) {
|
||||
|
||||
if options.ReloadOnCreate == "true" {
|
||||
if !c.resourceInIgnoredNamespace(obj) && c.resourceInSelectedNamespaces(obj) && secretControllerInitialized && configmapControllerInitialized {
|
||||
c.queue.Add(handler.ResourceCreatedHandler{
|
||||
Resource: obj,
|
||||
Collectors: c.collectors,
|
||||
Recorder: c.recorder,
|
||||
c.enqueue(handler.ResourceCreatedHandler{
|
||||
Resource: obj,
|
||||
Collectors: c.collectors,
|
||||
Recorder: c.recorder,
|
||||
EnqueueTime: time.Now(), // Track when item was enqueued
|
||||
})
|
||||
} else {
|
||||
c.collectors.RecordSkipped("ignored_or_not_selected")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -166,31 +171,42 @@ func (c *Controller) removeSelectedNamespaceFromCache(namespace v1.Namespace) {
|
||||
|
||||
// Update function to add an old object and a new object to the queue in case of updating a resource
|
||||
func (c *Controller) Update(old interface{}, new interface{}) {
|
||||
// Record event received
|
||||
c.collectors.RecordEventReceived("update", c.resource)
|
||||
|
||||
switch new.(type) {
|
||||
case *v1.Namespace:
|
||||
return
|
||||
}
|
||||
|
||||
if !c.resourceInIgnoredNamespace(new) && c.resourceInSelectedNamespaces(new) {
|
||||
c.queue.Add(handler.ResourceUpdatedHandler{
|
||||
c.enqueue(handler.ResourceUpdatedHandler{
|
||||
Resource: new,
|
||||
OldResource: old,
|
||||
Collectors: c.collectors,
|
||||
Recorder: c.recorder,
|
||||
EnqueueTime: time.Now(), // Track when item was enqueued
|
||||
})
|
||||
} else {
|
||||
c.collectors.RecordSkipped("ignored_or_not_selected")
|
||||
}
|
||||
}
|
||||
|
||||
// Delete function to add an object to the queue in case of deleting a resource
|
||||
func (c *Controller) Delete(old interface{}) {
|
||||
// Record event received
|
||||
c.collectors.RecordEventReceived("delete", c.resource)
|
||||
|
||||
if options.ReloadOnDelete == "true" {
|
||||
if !c.resourceInIgnoredNamespace(old) && c.resourceInSelectedNamespaces(old) && secretControllerInitialized && configmapControllerInitialized {
|
||||
c.queue.Add(handler.ResourceDeleteHandler{
|
||||
Resource: old,
|
||||
Collectors: c.collectors,
|
||||
Recorder: c.recorder,
|
||||
c.enqueue(handler.ResourceDeleteHandler{
|
||||
Resource: old,
|
||||
Collectors: c.collectors,
|
||||
Recorder: c.recorder,
|
||||
EnqueueTime: time.Now(), // Track when item was enqueued
|
||||
})
|
||||
} else {
|
||||
c.collectors.RecordSkipped("ignored_or_not_selected")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,6 +217,13 @@ func (c *Controller) Delete(old interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// enqueue adds an item to the queue and records metrics
|
||||
func (c *Controller) enqueue(item interface{}) {
|
||||
c.queue.Add(item)
|
||||
c.collectors.RecordQueueAdd()
|
||||
c.collectors.SetQueueDepth(c.queue.Len())
|
||||
}
|
||||
|
||||
// Run function for controller which handles the queue
|
||||
func (c *Controller) Run(threadiness int, stopCh chan struct{}) {
|
||||
defer runtime.HandleCrash()
|
||||
@@ -242,13 +265,36 @@ func (c *Controller) processNextItem() bool {
|
||||
if quit {
|
||||
return false
|
||||
}
|
||||
|
||||
// Update queue depth after getting item
|
||||
c.collectors.SetQueueDepth(c.queue.Len())
|
||||
|
||||
// Tell the queue that we are done with processing this key. This unblocks the key for other workers
|
||||
// This allows safe parallel processing because two events with the same key are never processed in
|
||||
// parallel.
|
||||
defer c.queue.Done(resourceHandler)
|
||||
|
||||
// Record queue latency if the handler supports it
|
||||
if h, ok := resourceHandler.(handler.TimedHandler); ok {
|
||||
queueLatency := time.Since(h.GetEnqueueTime())
|
||||
c.collectors.RecordQueueLatency(queueLatency)
|
||||
}
|
||||
|
||||
// Track reconcile/handler duration
|
||||
startTime := time.Now()
|
||||
|
||||
// Invoke the method containing the business logic
|
||||
err := resourceHandler.(handler.ResourceHandler).Handle()
|
||||
|
||||
duration := time.Since(startTime)
|
||||
|
||||
// Record reconcile metrics
|
||||
if err != nil {
|
||||
c.collectors.RecordReconcile("error", duration)
|
||||
} else {
|
||||
c.collectors.RecordReconcile("success", duration)
|
||||
}
|
||||
|
||||
// Handle the error if something went wrong during the execution of the business logic
|
||||
c.handleErr(err, resourceHandler)
|
||||
return true
|
||||
@@ -261,16 +307,26 @@ func (c *Controller) handleErr(err error, key interface{}) {
|
||||
// This ensures that future processing of updates for this key is not delayed because of
|
||||
// an outdated error history.
|
||||
c.queue.Forget(key)
|
||||
|
||||
// Record successful event processing
|
||||
c.collectors.RecordEventProcessed("unknown", c.resource, "success")
|
||||
return
|
||||
}
|
||||
|
||||
// Record error
|
||||
c.collectors.RecordError("handler_error")
|
||||
|
||||
// This controller retries 5 times if something goes wrong. After that, it stops trying.
|
||||
if c.queue.NumRequeues(key) < 5 {
|
||||
logrus.Errorf("Error syncing events: %v", err)
|
||||
|
||||
// Record retry
|
||||
c.collectors.RecordRetry()
|
||||
|
||||
// Re-enqueue the key rate limited. Based on the rate limiter on the
|
||||
// queue and the re-enqueue history, the key will be processed later again.
|
||||
c.queue.AddRateLimited(key)
|
||||
c.collectors.SetQueueDepth(c.queue.Len())
|
||||
return
|
||||
}
|
||||
|
||||
@@ -279,4 +335,7 @@ func (c *Controller) handleErr(err error, key interface{}) {
|
||||
runtime.HandleError(err)
|
||||
logrus.Errorf("Dropping key out of the queue: %v", err)
|
||||
logrus.Debugf("Dropping the key %q out of the queue: %v", key, err)
|
||||
|
||||
// Record failed event processing
|
||||
c.collectors.RecordEventProcessed("unknown", c.resource, "dropped")
|
||||
}
|
||||
|
||||
@@ -2157,19 +2157,21 @@ func TestController_resourceInIgnoredNamespace(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
c := &Controller{
|
||||
client: tt.fields.client,
|
||||
indexer: tt.fields.indexer,
|
||||
queue: tt.fields.queue,
|
||||
informer: tt.fields.informer,
|
||||
namespace: tt.fields.namespace,
|
||||
ignoredNamespaces: tt.fields.ignoredNamespaces,
|
||||
}
|
||||
if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want {
|
||||
t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
t.Run(
|
||||
tt.name, func(t *testing.T) {
|
||||
c := &Controller{
|
||||
client: tt.fields.client,
|
||||
indexer: tt.fields.indexer,
|
||||
queue: tt.fields.queue,
|
||||
informer: tt.fields.informer,
|
||||
namespace: tt.fields.namespace,
|
||||
ignoredNamespaces: tt.fields.ignoredNamespaces,
|
||||
}
|
||||
if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want {
|
||||
t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2331,35 +2333,37 @@ func TestController_resourceInNamespaceSelector(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fakeClient := fake.NewSimpleClientset()
|
||||
namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{})
|
||||
logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name)
|
||||
t.Run(
|
||||
tt.name, func(t *testing.T) {
|
||||
fakeClient := fake.NewSimpleClientset()
|
||||
namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{})
|
||||
logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name)
|
||||
|
||||
c := &Controller{
|
||||
client: fakeClient,
|
||||
indexer: tt.fields.indexer,
|
||||
queue: tt.fields.queue,
|
||||
informer: tt.fields.informer,
|
||||
namespace: tt.fields.namespace.Name,
|
||||
namespaceSelector: tt.fields.namespaceSelector,
|
||||
}
|
||||
c := &Controller{
|
||||
client: fakeClient,
|
||||
indexer: tt.fields.indexer,
|
||||
queue: tt.fields.queue,
|
||||
informer: tt.fields.informer,
|
||||
namespace: tt.fields.namespace.Name,
|
||||
namespaceSelector: tt.fields.namespaceSelector,
|
||||
}
|
||||
|
||||
listOptions := metav1.ListOptions{}
|
||||
listOptions.LabelSelector = tt.fields.namespaceSelector
|
||||
namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions)
|
||||
listOptions := metav1.ListOptions{}
|
||||
listOptions.LabelSelector = tt.fields.namespaceSelector
|
||||
namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions)
|
||||
|
||||
for _, ns := range namespaces.Items {
|
||||
c.addSelectedNamespaceToCache(ns)
|
||||
}
|
||||
for _, ns := range namespaces.Items {
|
||||
c.addSelectedNamespaceToCache(ns)
|
||||
}
|
||||
|
||||
if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want {
|
||||
t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want)
|
||||
}
|
||||
if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want {
|
||||
t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want)
|
||||
}
|
||||
|
||||
for _, ns := range namespaces.Items {
|
||||
c.removeSelectedNamespaceFromCache(ns)
|
||||
}
|
||||
})
|
||||
for _, ns := range namespaces.Items {
|
||||
c.removeSelectedNamespaceFromCache(ns)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stakater/Reloader/internal/pkg/metrics"
|
||||
"github.com/stakater/Reloader/internal/pkg/options"
|
||||
@@ -11,23 +13,45 @@ import (
|
||||
|
||||
// ResourceCreatedHandler contains new objects
|
||||
type ResourceCreatedHandler struct {
|
||||
Resource interface{}
|
||||
Collectors metrics.Collectors
|
||||
Recorder record.EventRecorder
|
||||
Resource interface{}
|
||||
Collectors metrics.Collectors
|
||||
Recorder record.EventRecorder
|
||||
EnqueueTime time.Time // Time when this handler was added to the queue
|
||||
}
|
||||
|
||||
// GetEnqueueTime returns when this handler was enqueued
|
||||
func (r ResourceCreatedHandler) GetEnqueueTime() time.Time {
|
||||
return r.EnqueueTime
|
||||
}
|
||||
|
||||
// Handle processes the newly created resource
|
||||
func (r ResourceCreatedHandler) Handle() error {
|
||||
startTime := time.Now()
|
||||
result := "success"
|
||||
|
||||
defer func() {
|
||||
r.Collectors.RecordReconcile(result, time.Since(startTime))
|
||||
}()
|
||||
|
||||
if r.Resource == nil {
|
||||
logrus.Errorf("Resource creation handler received nil resource")
|
||||
result = "error"
|
||||
} else {
|
||||
config, _ := r.GetConfig()
|
||||
// Send webhook
|
||||
if options.WebhookUrl != "" {
|
||||
return sendUpgradeWebhook(config, options.WebhookUrl)
|
||||
err := sendUpgradeWebhook(config, options.WebhookUrl)
|
||||
if err != nil {
|
||||
result = "error"
|
||||
}
|
||||
return err
|
||||
}
|
||||
// process resource based on its type
|
||||
return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
|
||||
err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
|
||||
if err != nil {
|
||||
result = "error"
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package handler
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stakater/Reloader/internal/pkg/callbacks"
|
||||
@@ -20,23 +21,45 @@ import (
|
||||
|
||||
// ResourceDeleteHandler contains new objects
|
||||
type ResourceDeleteHandler struct {
|
||||
Resource interface{}
|
||||
Collectors metrics.Collectors
|
||||
Recorder record.EventRecorder
|
||||
Resource interface{}
|
||||
Collectors metrics.Collectors
|
||||
Recorder record.EventRecorder
|
||||
EnqueueTime time.Time // Time when this handler was added to the queue
|
||||
}
|
||||
|
||||
// GetEnqueueTime returns when this handler was enqueued
|
||||
func (r ResourceDeleteHandler) GetEnqueueTime() time.Time {
|
||||
return r.EnqueueTime
|
||||
}
|
||||
|
||||
// Handle processes resources being deleted
|
||||
func (r ResourceDeleteHandler) Handle() error {
|
||||
startTime := time.Now()
|
||||
result := "success"
|
||||
|
||||
defer func() {
|
||||
r.Collectors.RecordReconcile(result, time.Since(startTime))
|
||||
}()
|
||||
|
||||
if r.Resource == nil {
|
||||
logrus.Errorf("Resource delete handler received nil resource")
|
||||
result = "error"
|
||||
} else {
|
||||
config, _ := r.GetConfig()
|
||||
// Send webhook
|
||||
if options.WebhookUrl != "" {
|
||||
return sendUpgradeWebhook(config, options.WebhookUrl)
|
||||
err := sendUpgradeWebhook(config, options.WebhookUrl)
|
||||
if err != nil {
|
||||
result = "error"
|
||||
}
|
||||
return err
|
||||
}
|
||||
// process resource based on its type
|
||||
return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy)
|
||||
err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy)
|
||||
if err != nil {
|
||||
result = "error"
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
package handler
|
||||
|
||||
import "github.com/stakater/Reloader/pkg/common"
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/stakater/Reloader/pkg/common"
|
||||
)
|
||||
|
||||
// ResourceHandler handles the creation and update of resources
|
||||
type ResourceHandler interface {
|
||||
Handle() error
|
||||
GetConfig() (common.Config, string)
|
||||
}
|
||||
|
||||
// TimedHandler is a handler that tracks when it was enqueued
|
||||
type TimedHandler interface {
|
||||
GetEnqueueTime() time.Time
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/stakater/Reloader/internal/pkg/metrics"
|
||||
"github.com/stakater/Reloader/internal/pkg/options"
|
||||
@@ -16,21 +18,47 @@ type ResourceUpdatedHandler struct {
|
||||
OldResource interface{}
|
||||
Collectors metrics.Collectors
|
||||
Recorder record.EventRecorder
|
||||
EnqueueTime time.Time // Time when this handler was added to the queue
|
||||
}
|
||||
|
||||
// GetEnqueueTime returns when this handler was enqueued
|
||||
func (r ResourceUpdatedHandler) GetEnqueueTime() time.Time {
|
||||
return r.EnqueueTime
|
||||
}
|
||||
|
||||
// Handle processes the updated resource
|
||||
func (r ResourceUpdatedHandler) Handle() error {
|
||||
startTime := time.Now()
|
||||
result := "success"
|
||||
|
||||
defer func() {
|
||||
r.Collectors.RecordReconcile(result, time.Since(startTime))
|
||||
}()
|
||||
|
||||
if r.Resource == nil || r.OldResource == nil {
|
||||
logrus.Errorf("Resource update handler received nil resource")
|
||||
result = "error"
|
||||
} else {
|
||||
config, oldSHAData := r.GetConfig()
|
||||
if config.SHAValue != oldSHAData {
|
||||
// Send a webhook if update
|
||||
if options.WebhookUrl != "" {
|
||||
return sendUpgradeWebhook(config, options.WebhookUrl)
|
||||
err := sendUpgradeWebhook(config, options.WebhookUrl)
|
||||
if err != nil {
|
||||
result = "error"
|
||||
}
|
||||
return err
|
||||
}
|
||||
// process resource based on its type
|
||||
return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
|
||||
err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
|
||||
if err != nil {
|
||||
result = "error"
|
||||
}
|
||||
return err
|
||||
} else {
|
||||
// No data change - skip
|
||||
result = "skipped"
|
||||
r.Collectors.RecordSkipped("no_data_change")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/parnurzeal/gorequest"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
@@ -236,23 +237,34 @@ func rollingUpgrade(clients kube.Clients, config common.Config, upgradeFuncs cal
|
||||
func PerformAction(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy) error {
|
||||
items := upgradeFuncs.ItemsFunc(clients, config.Namespace)
|
||||
|
||||
// Record workloads scanned
|
||||
collectors.RecordWorkloadsScanned(upgradeFuncs.ResourceType, len(items))
|
||||
|
||||
matchedCount := 0
|
||||
for _, item := range items {
|
||||
err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) error {
|
||||
return upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource)
|
||||
err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) (bool, error) {
|
||||
matched, err := upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource)
|
||||
if matched {
|
||||
matchedCount++
|
||||
}
|
||||
return matched, err
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Record workloads matched
|
||||
collectors.RecordWorkloadsMatched(upgradeFuncs.ResourceType, matchedCount)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error {
|
||||
func retryOnConflict(backoff wait.Backoff, fn func(_ bool) (bool, error)) error {
|
||||
var lastError error
|
||||
fetchResource := false // do not fetch resource on first attempt, already done by ItemsFunc
|
||||
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
||||
err := fn(fetchResource)
|
||||
_, err := fn(fetchResource)
|
||||
fetchResource = true
|
||||
switch {
|
||||
case err == nil:
|
||||
@@ -270,17 +282,19 @@ func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) error {
|
||||
func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) (bool, error) {
|
||||
actionStartTime := time.Now()
|
||||
|
||||
accessor, err := meta.Accessor(resource)
|
||||
if err != nil {
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
resourceName := accessor.GetName()
|
||||
if fetchResource {
|
||||
resource, err = upgradeFuncs.ItemFunc(clients, resourceName, config.Namespace)
|
||||
if err != nil {
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
annotations := upgradeFuncs.AnnotationsFunc(resource)
|
||||
@@ -289,13 +303,14 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
|
||||
|
||||
if !result.ShouldReload {
|
||||
logrus.Debugf("No changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace)
|
||||
return nil
|
||||
return false, nil
|
||||
}
|
||||
|
||||
strategyResult := strategy(upgradeFuncs, resource, config, result.AutoReload)
|
||||
|
||||
if strategyResult.Result != constants.Updated {
|
||||
return nil
|
||||
collectors.RecordSkipped("strategy_not_updated")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// find correct annotation and update the resource
|
||||
@@ -309,7 +324,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
|
||||
_, err = PauseDeployment(deployment, clients, config.Namespace, pauseInterval)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to pause deployment '%s' in namespace '%s': %v", resourceName, config.Namespace, err)
|
||||
return err
|
||||
return true, err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -320,16 +335,19 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
|
||||
err = upgradeFuncs.UpdateFunc(clients, config.Namespace, resource)
|
||||
}
|
||||
|
||||
actionLatency := time.Since(actionStartTime)
|
||||
|
||||
if err != nil {
|
||||
message := fmt.Sprintf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err)
|
||||
logrus.Errorf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err)
|
||||
|
||||
collectors.Reloaded.With(prometheus.Labels{"success": "false"}).Inc()
|
||||
collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "false", "namespace": config.Namespace}).Inc()
|
||||
collectors.RecordAction(upgradeFuncs.ResourceType, "error", actionLatency)
|
||||
if recorder != nil {
|
||||
recorder.Event(resource, v1.EventTypeWarning, "ReloadFail", message)
|
||||
}
|
||||
return err
|
||||
return true, err
|
||||
} else {
|
||||
message := fmt.Sprintf("Changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace)
|
||||
message += fmt.Sprintf(", Updated '%s' of type '%s' in namespace '%s'", resourceName, upgradeFuncs.ResourceType, config.Namespace)
|
||||
@@ -338,6 +356,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
|
||||
|
||||
collectors.Reloaded.With(prometheus.Labels{"success": "true"}).Inc()
|
||||
collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "true", "namespace": config.Namespace}).Inc()
|
||||
collectors.RecordAction(upgradeFuncs.ResourceType, "success", actionLatency)
|
||||
alert_on_reload, ok := os.LookupEnv("ALERT_ON_RELOAD")
|
||||
if recorder != nil {
|
||||
recorder.Event(resource, v1.EventTypeNormal, "Reloaded", message)
|
||||
@@ -350,7 +369,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func getVolumeMountName(volumes []v1.Volume, mountType string, volumeName string) string {
|
||||
|
||||
@@ -1,54 +1,407 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"k8s.io/client-go/tools/metrics"
|
||||
)
|
||||
|
||||
// clientGoRequestMetrics implements metrics.LatencyMetric and metrics.ResultMetric
|
||||
// to expose client-go's rest_client_requests_total metric
|
||||
type clientGoRequestMetrics struct {
|
||||
requestCounter *prometheus.CounterVec
|
||||
requestLatency *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
func (m *clientGoRequestMetrics) Increment(ctx context.Context, code string, method string, host string) {
|
||||
m.requestCounter.WithLabelValues(code, method, host).Inc()
|
||||
}
|
||||
|
||||
func (m *clientGoRequestMetrics) Observe(ctx context.Context, verb string, u url.URL, latency time.Duration) {
|
||||
m.requestLatency.WithLabelValues(verb, u.Host).Observe(latency.Seconds())
|
||||
}
|
||||
|
||||
var clientGoMetrics = &clientGoRequestMetrics{
|
||||
requestCounter: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "rest_client_requests_total",
|
||||
Help: "Number of HTTP requests, partitioned by status code, method, and host.",
|
||||
},
|
||||
[]string{"code", "method", "host"},
|
||||
),
|
||||
requestLatency: prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "rest_client_request_duration_seconds",
|
||||
Help: "Request latency in seconds. Broken down by verb and host.",
|
||||
Buckets: []float64{0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30},
|
||||
},
|
||||
[]string{"verb", "host"},
|
||||
),
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Register the metrics collectors
|
||||
prometheus.MustRegister(clientGoMetrics.requestCounter)
|
||||
prometheus.MustRegister(clientGoMetrics.requestLatency)
|
||||
|
||||
// Register our metrics implementation with client-go
|
||||
metrics.RequestResult = clientGoMetrics
|
||||
metrics.RequestLatency = clientGoMetrics
|
||||
}
|
||||
|
||||
// Collectors holds all Prometheus metrics collectors for Reloader.
|
||||
type Collectors struct {
|
||||
// Existing metrics (preserved for backward compatibility)
|
||||
Reloaded *prometheus.CounterVec
|
||||
ReloadedByNamespace *prometheus.CounterVec
|
||||
countByNamespace bool
|
||||
|
||||
// Reconcile/Handler metrics
|
||||
ReconcileTotal *prometheus.CounterVec // Total reconcile calls by result
|
||||
ReconcileDuration *prometheus.HistogramVec // Time spent in reconcile/handler
|
||||
|
||||
// Action metrics
|
||||
ActionTotal *prometheus.CounterVec // Total actions by workload kind and result
|
||||
ActionLatency *prometheus.HistogramVec // Time from event to action applied
|
||||
|
||||
// Skip metrics
|
||||
SkippedTotal *prometheus.CounterVec // Skipped operations by reason
|
||||
|
||||
// Queue metrics
|
||||
QueueDepth prometheus.Gauge // Current queue depth
|
||||
QueueAdds prometheus.Counter // Total items added to queue
|
||||
QueueLatency *prometheus.HistogramVec // Time spent in queue
|
||||
|
||||
// Error and retry metrics
|
||||
ErrorsTotal *prometheus.CounterVec // Errors by type
|
||||
RetriesTotal prometheus.Counter // Total retries
|
||||
|
||||
// Event processing metrics
|
||||
EventsReceived *prometheus.CounterVec // Events received by type (add/update/delete)
|
||||
EventsProcessed *prometheus.CounterVec // Events processed by type and result
|
||||
|
||||
// Resource discovery metrics
|
||||
WorkloadsScanned *prometheus.CounterVec // Workloads scanned by kind
|
||||
WorkloadsMatched *prometheus.CounterVec // Workloads matched for reload by kind
|
||||
}
|
||||
|
||||
// RecordReload records a reload event with the given success status and namespace.
|
||||
// Preserved for backward compatibility.
|
||||
func (c *Collectors) RecordReload(success bool, namespace string) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
|
||||
successLabel := "false"
|
||||
if success {
|
||||
successLabel = "true"
|
||||
}
|
||||
|
||||
c.Reloaded.With(prometheus.Labels{"success": successLabel}).Inc()
|
||||
|
||||
if c.countByNamespace {
|
||||
c.ReloadedByNamespace.With(prometheus.Labels{
|
||||
"success": successLabel,
|
||||
"namespace": namespace,
|
||||
}).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
// RecordReconcile records a reconcile/handler invocation.
|
||||
func (c *Collectors) RecordReconcile(result string, duration time.Duration) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.ReconcileTotal.With(prometheus.Labels{"result": result}).Inc()
|
||||
c.ReconcileDuration.With(prometheus.Labels{"result": result}).Observe(duration.Seconds())
|
||||
}
|
||||
|
||||
// RecordAction records a reload action on a workload.
|
||||
func (c *Collectors) RecordAction(workloadKind string, result string, latency time.Duration) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.ActionTotal.With(prometheus.Labels{"workload_kind": workloadKind, "result": result}).Inc()
|
||||
c.ActionLatency.With(prometheus.Labels{"workload_kind": workloadKind}).Observe(latency.Seconds())
|
||||
}
|
||||
|
||||
// RecordSkipped records a skipped operation with reason.
|
||||
func (c *Collectors) RecordSkipped(reason string) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.SkippedTotal.With(prometheus.Labels{"reason": reason}).Inc()
|
||||
}
|
||||
|
||||
// RecordQueueAdd records an item being added to the queue.
|
||||
func (c *Collectors) RecordQueueAdd() {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.QueueAdds.Inc()
|
||||
}
|
||||
|
||||
// SetQueueDepth sets the current queue depth.
|
||||
func (c *Collectors) SetQueueDepth(depth int) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.QueueDepth.Set(float64(depth))
|
||||
}
|
||||
|
||||
// RecordQueueLatency records how long an item spent in the queue.
|
||||
func (c *Collectors) RecordQueueLatency(latency time.Duration) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.QueueLatency.With(prometheus.Labels{}).Observe(latency.Seconds())
|
||||
}
|
||||
|
||||
// RecordError records an error by type.
|
||||
func (c *Collectors) RecordError(errorType string) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.ErrorsTotal.With(prometheus.Labels{"type": errorType}).Inc()
|
||||
}
|
||||
|
||||
// RecordRetry records a retry attempt.
|
||||
func (c *Collectors) RecordRetry() {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.RetriesTotal.Inc()
|
||||
}
|
||||
|
||||
// RecordEventReceived records an event being received.
|
||||
func (c *Collectors) RecordEventReceived(eventType string, resourceType string) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.EventsReceived.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType}).Inc()
|
||||
}
|
||||
|
||||
// RecordEventProcessed records an event being processed.
|
||||
func (c *Collectors) RecordEventProcessed(eventType string, resourceType string, result string) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.EventsProcessed.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType, "result": result}).Inc()
|
||||
}
|
||||
|
||||
// RecordWorkloadsScanned records workloads scanned during a reconcile.
|
||||
func (c *Collectors) RecordWorkloadsScanned(kind string, count int) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.WorkloadsScanned.With(prometheus.Labels{"kind": kind}).Add(float64(count))
|
||||
}
|
||||
|
||||
// RecordWorkloadsMatched records workloads matched for reload.
|
||||
func (c *Collectors) RecordWorkloadsMatched(kind string, count int) {
|
||||
if c == nil {
|
||||
return
|
||||
}
|
||||
c.WorkloadsMatched.With(prometheus.Labels{"kind": kind}).Add(float64(count))
|
||||
}
|
||||
|
||||
func NewCollectors() Collectors {
|
||||
// Existing metrics (preserved)
|
||||
reloaded := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "reload_executed_total",
|
||||
Help: "Counter of reloads executed by Reloader.",
|
||||
},
|
||||
[]string{
|
||||
"success",
|
||||
},
|
||||
[]string{"success"},
|
||||
)
|
||||
|
||||
//set 0 as default value
|
||||
reloaded.With(prometheus.Labels{"success": "true"}).Add(0)
|
||||
reloaded.With(prometheus.Labels{"success": "false"}).Add(0)
|
||||
|
||||
reloaded_by_namespace := prometheus.NewCounterVec(
|
||||
reloadedByNamespace := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "reload_executed_total_by_namespace",
|
||||
Help: "Counter of reloads executed by Reloader by namespace.",
|
||||
},
|
||||
[]string{
|
||||
"success",
|
||||
"namespace",
|
||||
[]string{"success", "namespace"},
|
||||
)
|
||||
|
||||
// === NEW: Comprehensive metrics ===
|
||||
|
||||
reconcileTotal := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "reconcile_total",
|
||||
Help: "Total number of reconcile/handler invocations by result.",
|
||||
},
|
||||
[]string{"result"},
|
||||
)
|
||||
|
||||
reconcileDuration := prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "reconcile_duration_seconds",
|
||||
Help: "Time spent in reconcile/handler in seconds.",
|
||||
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
|
||||
},
|
||||
[]string{"result"},
|
||||
)
|
||||
|
||||
actionTotal := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "action_total",
|
||||
Help: "Total number of reload actions by workload kind and result.",
|
||||
},
|
||||
[]string{"workload_kind", "result"},
|
||||
)
|
||||
|
||||
actionLatency := prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "action_latency_seconds",
|
||||
Help: "Time from event received to action applied in seconds.",
|
||||
Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60},
|
||||
},
|
||||
[]string{"workload_kind"},
|
||||
)
|
||||
|
||||
skippedTotal := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "skipped_total",
|
||||
Help: "Total number of skipped operations by reason.",
|
||||
},
|
||||
[]string{"reason"},
|
||||
)
|
||||
|
||||
queueDepth := prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "workqueue_depth",
|
||||
Help: "Current depth of the work queue.",
|
||||
},
|
||||
)
|
||||
|
||||
queueAdds := prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "workqueue_adds_total",
|
||||
Help: "Total number of items added to the work queue.",
|
||||
},
|
||||
)
|
||||
|
||||
queueLatency := prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "workqueue_latency_seconds",
|
||||
Help: "Time spent in the work queue in seconds.",
|
||||
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
|
||||
},
|
||||
[]string{},
|
||||
)
|
||||
|
||||
errorsTotal := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "errors_total",
|
||||
Help: "Total number of errors by type.",
|
||||
},
|
||||
[]string{"type"},
|
||||
)
|
||||
|
||||
retriesTotal := prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "retries_total",
|
||||
Help: "Total number of retry attempts.",
|
||||
},
|
||||
)
|
||||
|
||||
eventsReceived := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "events_received_total",
|
||||
Help: "Total number of events received by type and resource.",
|
||||
},
|
||||
[]string{"event_type", "resource_type"},
|
||||
)
|
||||
|
||||
eventsProcessed := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "events_processed_total",
|
||||
Help: "Total number of events processed by type, resource, and result.",
|
||||
},
|
||||
[]string{"event_type", "resource_type", "result"},
|
||||
)
|
||||
|
||||
workloadsScanned := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "workloads_scanned_total",
|
||||
Help: "Total number of workloads scanned by kind.",
|
||||
},
|
||||
[]string{"kind"},
|
||||
)
|
||||
|
||||
workloadsMatched := prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "reloader",
|
||||
Name: "workloads_matched_total",
|
||||
Help: "Total number of workloads matched for reload by kind.",
|
||||
},
|
||||
[]string{"kind"},
|
||||
)
|
||||
|
||||
return Collectors{
|
||||
Reloaded: reloaded,
|
||||
ReloadedByNamespace: reloaded_by_namespace,
|
||||
ReloadedByNamespace: reloadedByNamespace,
|
||||
countByNamespace: os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled",
|
||||
|
||||
ReconcileTotal: reconcileTotal,
|
||||
ReconcileDuration: reconcileDuration,
|
||||
ActionTotal: actionTotal,
|
||||
ActionLatency: actionLatency,
|
||||
SkippedTotal: skippedTotal,
|
||||
QueueDepth: queueDepth,
|
||||
QueueAdds: queueAdds,
|
||||
QueueLatency: queueLatency,
|
||||
ErrorsTotal: errorsTotal,
|
||||
RetriesTotal: retriesTotal,
|
||||
EventsReceived: eventsReceived,
|
||||
EventsProcessed: eventsProcessed,
|
||||
WorkloadsScanned: workloadsScanned,
|
||||
WorkloadsMatched: workloadsMatched,
|
||||
}
|
||||
}
|
||||
|
||||
func SetupPrometheusEndpoint() Collectors {
|
||||
collectors := NewCollectors()
|
||||
|
||||
// Register all metrics
|
||||
prometheus.MustRegister(collectors.Reloaded)
|
||||
prometheus.MustRegister(collectors.ReconcileTotal)
|
||||
prometheus.MustRegister(collectors.ReconcileDuration)
|
||||
prometheus.MustRegister(collectors.ActionTotal)
|
||||
prometheus.MustRegister(collectors.ActionLatency)
|
||||
prometheus.MustRegister(collectors.SkippedTotal)
|
||||
prometheus.MustRegister(collectors.QueueDepth)
|
||||
prometheus.MustRegister(collectors.QueueAdds)
|
||||
prometheus.MustRegister(collectors.QueueLatency)
|
||||
prometheus.MustRegister(collectors.ErrorsTotal)
|
||||
prometheus.MustRegister(collectors.RetriesTotal)
|
||||
prometheus.MustRegister(collectors.EventsReceived)
|
||||
prometheus.MustRegister(collectors.EventsProcessed)
|
||||
prometheus.MustRegister(collectors.WorkloadsScanned)
|
||||
prometheus.MustRegister(collectors.WorkloadsMatched)
|
||||
|
||||
if os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled" {
|
||||
prometheus.MustRegister(collectors.ReloadedByNamespace)
|
||||
|
||||
Reference in New Issue
Block a user