diff --git a/.github/workflows/loadtest.yml b/.github/workflows/loadtest.yml
new file mode 100644
index 0000000..a62e3e3
--- /dev/null
+++ b/.github/workflows/loadtest.yml
@@ -0,0 +1,222 @@
+name: Load Test
+
+on:
+ issue_comment:
+ types: [created]
+
+jobs:
+ loadtest:
+ # Only run on PR comments with /loadtest command
+ if: |
+ github.event.issue.pull_request &&
+ contains(github.event.comment.body, '/loadtest')
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Add reaction to comment
+ uses: actions/github-script@v7
+ with:
+ script: |
+ await github.rest.reactions.createForIssueComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: context.payload.comment.id,
+ content: 'rocket'
+ });
+
+ - name: Get PR details
+ id: pr
+ uses: actions/github-script@v7
+ with:
+ script: |
+ const pr = await github.rest.pulls.get({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ pull_number: context.issue.number
+ });
+ core.setOutput('head_ref', pr.data.head.ref);
+ core.setOutput('head_sha', pr.data.head.sha);
+ core.setOutput('base_ref', pr.data.base.ref);
+ core.setOutput('base_sha', pr.data.base.sha);
+ console.log(`PR #${context.issue.number}: ${pr.data.head.ref} -> ${pr.data.base.ref}`);
+
+ - name: Set up Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: '1.23'
+ cache: false
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Install kind
+ run: |
+ curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64
+ chmod +x ./kind
+ sudo mv ./kind /usr/local/bin/kind
+
+ # Build OLD image from base branch (e.g., main)
+ - name: Checkout base branch (old)
+ uses: actions/checkout@v4
+ with:
+ ref: ${{ steps.pr.outputs.base_ref }}
+ path: old
+
+ - name: Build old image
+ run: |
+ cd old
+ docker build -t localhost/reloader:old -f Dockerfile .
+ echo "Built old image from ${{ steps.pr.outputs.base_ref }} (${{ steps.pr.outputs.base_sha }})"
+
+ # Build NEW image from PR branch
+ - name: Checkout PR branch (new)
+ uses: actions/checkout@v4
+ with:
+ ref: ${{ steps.pr.outputs.head_ref }}
+ path: new
+
+ - name: Build new image
+ run: |
+ cd new
+ docker build -t localhost/reloader:new -f Dockerfile .
+ echo "Built new image from ${{ steps.pr.outputs.head_ref }} (${{ steps.pr.outputs.head_sha }})"
+
+ # Build and run loadtest from PR branch
+ - name: Build loadtest tool
+ run: |
+ cd new/test/loadtest
+ go build -o loadtest ./cmd/loadtest
+
+ - name: Run A/B comparison load test
+ id: loadtest
+ run: |
+ cd new/test/loadtest
+ ./loadtest run \
+ --old-image=localhost/reloader:old \
+ --new-image=localhost/reloader:new \
+ --scenario=all \
+ --duration=60 2>&1 | tee loadtest-output.txt
+ echo "exitcode=${PIPESTATUS[0]}" >> $GITHUB_OUTPUT
+
+ - name: Upload results
+ uses: actions/upload-artifact@v4
+ if: always()
+ with:
+ name: loadtest-results
+ path: |
+ new/test/loadtest/results/
+ new/test/loadtest/loadtest-output.txt
+ retention-days: 30
+
+ - name: Post results comment
+ uses: actions/github-script@v7
+ if: always()
+ with:
+ script: |
+ const fs = require('fs');
+
+ let results = '';
+ const resultsDir = 'new/test/loadtest/results';
+
+ // Collect summary of all scenarios
+ let passCount = 0;
+ let failCount = 0;
+ const summaries = [];
+
+ if (fs.existsSync(resultsDir)) {
+ const scenarios = fs.readdirSync(resultsDir).sort();
+ for (const scenario of scenarios) {
+ const reportPath = `${resultsDir}/${scenario}/report.txt`;
+ if (fs.existsSync(reportPath)) {
+ const report = fs.readFileSync(reportPath, 'utf8');
+
+ // Extract status from report
+ const statusMatch = report.match(/Status:\s+(PASS|FAIL)/);
+ const status = statusMatch ? statusMatch[1] : 'UNKNOWN';
+
+ if (status === 'PASS') passCount++;
+ else failCount++;
+
+ // Extract key metrics for summary
+ const actionMatch = report.match(/action_total\s+[\d.]+\s+[\d.]+\s+[\d.]+/);
+ const errorsMatch = report.match(/errors_total\s+[\d.]+\s+[\d.]+/);
+
+ summaries.push(`| ${scenario} | ${status === 'PASS' ? '✅' : '❌'} ${status} |`);
+
+ results += `\n\n${status === 'PASS' ? '✅' : '❌'} ${scenario}
\n\n\`\`\`\n${report}\n\`\`\`\n \n`;
+ }
+ }
+ }
+
+ if (!results) {
+ // Read raw output if no reports
+ if (fs.existsSync('new/test/loadtest/loadtest-output.txt')) {
+ const output = fs.readFileSync('new/test/loadtest/loadtest-output.txt', 'utf8');
+ const maxLen = 60000;
+ results = output.length > maxLen
+ ? output.substring(output.length - maxLen)
+ : output;
+ results = `\`\`\`\n${results}\n\`\`\``;
+ } else {
+ results = 'No results available';
+ }
+ }
+
+ const overallStatus = failCount === 0 ? '✅ ALL PASSED' : `❌ ${failCount} FAILED`;
+
+ const body = `## Load Test Results ${overallStatus}
+
+ **Comparing:** \`${{ steps.pr.outputs.base_ref }}\` (old) vs \`${{ steps.pr.outputs.head_ref }}\` (new)
+ **Old commit:** ${{ steps.pr.outputs.base_sha }}
+ **New commit:** ${{ steps.pr.outputs.head_sha }}
+ **Triggered by:** @${{ github.event.comment.user.login }}
+
+ ### Summary
+
+ | Scenario | Status |
+ |----------|--------|
+ ${summaries.join('\n')}
+
+ **Total:** ${passCount} passed, ${failCount} failed
+
+ ### Detailed Results
+
+ ${results}
+
+
+ 📦 Download full results
+
+ Artifacts are available in the [workflow run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+ `;
+
+ await github.rest.issues.createComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ issue_number: context.issue.number,
+ body: body
+ });
+
+ - name: Add success reaction
+ if: success()
+ uses: actions/github-script@v7
+ with:
+ script: |
+ await github.rest.reactions.createForIssueComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: context.payload.comment.id,
+ content: '+1'
+ });
+
+ - name: Add failure reaction
+ if: failure()
+ uses: actions/github-script@v7
+ with:
+ script: |
+ await github.rest.reactions.createForIssueComment({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ comment_id: context.payload.comment.id,
+ content: '-1'
+ });
diff --git a/internal/pkg/controller/controller.go b/internal/pkg/controller/controller.go
index 15b2e0f..7dd3f5e 100644
--- a/internal/pkg/controller/controller.go
+++ b/internal/pkg/controller/controller.go
@@ -103,6 +103,8 @@ func NewController(
// Add function to add a new object to the queue in case of creating a resource
func (c *Controller) Add(obj interface{}) {
+ // Record event received
+ c.collectors.RecordEventReceived("add", c.resource)
switch object := obj.(type) {
case *v1.Namespace:
@@ -112,11 +114,14 @@ func (c *Controller) Add(obj interface{}) {
if options.ReloadOnCreate == "true" {
if !c.resourceInIgnoredNamespace(obj) && c.resourceInSelectedNamespaces(obj) && secretControllerInitialized && configmapControllerInitialized {
- c.queue.Add(handler.ResourceCreatedHandler{
- Resource: obj,
- Collectors: c.collectors,
- Recorder: c.recorder,
+ c.enqueue(handler.ResourceCreatedHandler{
+ Resource: obj,
+ Collectors: c.collectors,
+ Recorder: c.recorder,
+ EnqueueTime: time.Now(), // Track when item was enqueued
})
+ } else {
+ c.collectors.RecordSkipped("ignored_or_not_selected")
}
}
}
@@ -166,31 +171,42 @@ func (c *Controller) removeSelectedNamespaceFromCache(namespace v1.Namespace) {
// Update function to add an old object and a new object to the queue in case of updating a resource
func (c *Controller) Update(old interface{}, new interface{}) {
+ // Record event received
+ c.collectors.RecordEventReceived("update", c.resource)
+
switch new.(type) {
case *v1.Namespace:
return
}
if !c.resourceInIgnoredNamespace(new) && c.resourceInSelectedNamespaces(new) {
- c.queue.Add(handler.ResourceUpdatedHandler{
+ c.enqueue(handler.ResourceUpdatedHandler{
Resource: new,
OldResource: old,
Collectors: c.collectors,
Recorder: c.recorder,
+ EnqueueTime: time.Now(), // Track when item was enqueued
})
+ } else {
+ c.collectors.RecordSkipped("ignored_or_not_selected")
}
}
// Delete function to add an object to the queue in case of deleting a resource
func (c *Controller) Delete(old interface{}) {
+ // Record event received
+ c.collectors.RecordEventReceived("delete", c.resource)
if options.ReloadOnDelete == "true" {
if !c.resourceInIgnoredNamespace(old) && c.resourceInSelectedNamespaces(old) && secretControllerInitialized && configmapControllerInitialized {
- c.queue.Add(handler.ResourceDeleteHandler{
- Resource: old,
- Collectors: c.collectors,
- Recorder: c.recorder,
+ c.enqueue(handler.ResourceDeleteHandler{
+ Resource: old,
+ Collectors: c.collectors,
+ Recorder: c.recorder,
+ EnqueueTime: time.Now(), // Track when item was enqueued
})
+ } else {
+ c.collectors.RecordSkipped("ignored_or_not_selected")
}
}
@@ -201,6 +217,13 @@ func (c *Controller) Delete(old interface{}) {
}
}
+// enqueue adds an item to the queue and records metrics
+func (c *Controller) enqueue(item interface{}) {
+ c.queue.Add(item)
+ c.collectors.RecordQueueAdd()
+ c.collectors.SetQueueDepth(c.queue.Len())
+}
+
// Run function for controller which handles the queue
func (c *Controller) Run(threadiness int, stopCh chan struct{}) {
defer runtime.HandleCrash()
@@ -242,13 +265,36 @@ func (c *Controller) processNextItem() bool {
if quit {
return false
}
+
+ // Update queue depth after getting item
+ c.collectors.SetQueueDepth(c.queue.Len())
+
// Tell the queue that we are done with processing this key. This unblocks the key for other workers
// This allows safe parallel processing because two events with the same key are never processed in
// parallel.
defer c.queue.Done(resourceHandler)
+ // Record queue latency if the handler supports it
+ if h, ok := resourceHandler.(handler.TimedHandler); ok {
+ queueLatency := time.Since(h.GetEnqueueTime())
+ c.collectors.RecordQueueLatency(queueLatency)
+ }
+
+ // Track reconcile/handler duration
+ startTime := time.Now()
+
// Invoke the method containing the business logic
err := resourceHandler.(handler.ResourceHandler).Handle()
+
+ duration := time.Since(startTime)
+
+ // Record reconcile metrics
+ if err != nil {
+ c.collectors.RecordReconcile("error", duration)
+ } else {
+ c.collectors.RecordReconcile("success", duration)
+ }
+
// Handle the error if something went wrong during the execution of the business logic
c.handleErr(err, resourceHandler)
return true
@@ -261,16 +307,26 @@ func (c *Controller) handleErr(err error, key interface{}) {
// This ensures that future processing of updates for this key is not delayed because of
// an outdated error history.
c.queue.Forget(key)
+
+ // Record successful event processing
+ c.collectors.RecordEventProcessed("unknown", c.resource, "success")
return
}
+ // Record error
+ c.collectors.RecordError("handler_error")
+
// This controller retries 5 times if something goes wrong. After that, it stops trying.
if c.queue.NumRequeues(key) < 5 {
logrus.Errorf("Error syncing events: %v", err)
+ // Record retry
+ c.collectors.RecordRetry()
+
// Re-enqueue the key rate limited. Based on the rate limiter on the
// queue and the re-enqueue history, the key will be processed later again.
c.queue.AddRateLimited(key)
+ c.collectors.SetQueueDepth(c.queue.Len())
return
}
@@ -279,4 +335,7 @@ func (c *Controller) handleErr(err error, key interface{}) {
runtime.HandleError(err)
logrus.Errorf("Dropping key out of the queue: %v", err)
logrus.Debugf("Dropping the key %q out of the queue: %v", key, err)
+
+ // Record failed event processing
+ c.collectors.RecordEventProcessed("unknown", c.resource, "dropped")
}
diff --git a/internal/pkg/controller/controller_test.go b/internal/pkg/controller/controller_test.go
index 63e6be3..7b4c728 100644
--- a/internal/pkg/controller/controller_test.go
+++ b/internal/pkg/controller/controller_test.go
@@ -2157,19 +2157,21 @@ func TestController_resourceInIgnoredNamespace(t *testing.T) {
},
}
for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- c := &Controller{
- client: tt.fields.client,
- indexer: tt.fields.indexer,
- queue: tt.fields.queue,
- informer: tt.fields.informer,
- namespace: tt.fields.namespace,
- ignoredNamespaces: tt.fields.ignoredNamespaces,
- }
- if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want {
- t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want)
- }
- })
+ t.Run(
+ tt.name, func(t *testing.T) {
+ c := &Controller{
+ client: tt.fields.client,
+ indexer: tt.fields.indexer,
+ queue: tt.fields.queue,
+ informer: tt.fields.informer,
+ namespace: tt.fields.namespace,
+ ignoredNamespaces: tt.fields.ignoredNamespaces,
+ }
+ if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want {
+ t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want)
+ }
+ },
+ )
}
}
@@ -2331,35 +2333,37 @@ func TestController_resourceInNamespaceSelector(t *testing.T) {
}
for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- fakeClient := fake.NewSimpleClientset()
- namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{})
- logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name)
+ t.Run(
+ tt.name, func(t *testing.T) {
+ fakeClient := fake.NewSimpleClientset()
+ namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{})
+ logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name)
- c := &Controller{
- client: fakeClient,
- indexer: tt.fields.indexer,
- queue: tt.fields.queue,
- informer: tt.fields.informer,
- namespace: tt.fields.namespace.Name,
- namespaceSelector: tt.fields.namespaceSelector,
- }
+ c := &Controller{
+ client: fakeClient,
+ indexer: tt.fields.indexer,
+ queue: tt.fields.queue,
+ informer: tt.fields.informer,
+ namespace: tt.fields.namespace.Name,
+ namespaceSelector: tt.fields.namespaceSelector,
+ }
- listOptions := metav1.ListOptions{}
- listOptions.LabelSelector = tt.fields.namespaceSelector
- namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions)
+ listOptions := metav1.ListOptions{}
+ listOptions.LabelSelector = tt.fields.namespaceSelector
+ namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions)
- for _, ns := range namespaces.Items {
- c.addSelectedNamespaceToCache(ns)
- }
+ for _, ns := range namespaces.Items {
+ c.addSelectedNamespaceToCache(ns)
+ }
- if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want {
- t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want)
- }
+ if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want {
+ t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want)
+ }
- for _, ns := range namespaces.Items {
- c.removeSelectedNamespaceFromCache(ns)
- }
- })
+ for _, ns := range namespaces.Items {
+ c.removeSelectedNamespaceFromCache(ns)
+ }
+ },
+ )
}
}
diff --git a/internal/pkg/handler/create.go b/internal/pkg/handler/create.go
index fab7378..5fd3014 100644
--- a/internal/pkg/handler/create.go
+++ b/internal/pkg/handler/create.go
@@ -1,6 +1,8 @@
package handler
import (
+ "time"
+
"github.com/sirupsen/logrus"
"github.com/stakater/Reloader/internal/pkg/metrics"
"github.com/stakater/Reloader/internal/pkg/options"
@@ -11,23 +13,45 @@ import (
// ResourceCreatedHandler contains new objects
type ResourceCreatedHandler struct {
- Resource interface{}
- Collectors metrics.Collectors
- Recorder record.EventRecorder
+ Resource interface{}
+ Collectors metrics.Collectors
+ Recorder record.EventRecorder
+ EnqueueTime time.Time // Time when this handler was added to the queue
+}
+
+// GetEnqueueTime returns when this handler was enqueued
+func (r ResourceCreatedHandler) GetEnqueueTime() time.Time {
+ return r.EnqueueTime
}
// Handle processes the newly created resource
func (r ResourceCreatedHandler) Handle() error {
+ startTime := time.Now()
+ result := "success"
+
+ defer func() {
+ r.Collectors.RecordReconcile(result, time.Since(startTime))
+ }()
+
if r.Resource == nil {
logrus.Errorf("Resource creation handler received nil resource")
+ result = "error"
} else {
config, _ := r.GetConfig()
// Send webhook
if options.WebhookUrl != "" {
- return sendUpgradeWebhook(config, options.WebhookUrl)
+ err := sendUpgradeWebhook(config, options.WebhookUrl)
+ if err != nil {
+ result = "error"
+ }
+ return err
}
// process resource based on its type
- return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
+ err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
+ if err != nil {
+ result = "error"
+ }
+ return err
}
return nil
}
diff --git a/internal/pkg/handler/delete.go b/internal/pkg/handler/delete.go
index 65c671e..243602c 100644
--- a/internal/pkg/handler/delete.go
+++ b/internal/pkg/handler/delete.go
@@ -3,6 +3,7 @@ package handler
import (
"fmt"
"slices"
+ "time"
"github.com/sirupsen/logrus"
"github.com/stakater/Reloader/internal/pkg/callbacks"
@@ -20,23 +21,45 @@ import (
// ResourceDeleteHandler contains new objects
type ResourceDeleteHandler struct {
- Resource interface{}
- Collectors metrics.Collectors
- Recorder record.EventRecorder
+ Resource interface{}
+ Collectors metrics.Collectors
+ Recorder record.EventRecorder
+ EnqueueTime time.Time // Time when this handler was added to the queue
+}
+
+// GetEnqueueTime returns when this handler was enqueued
+func (r ResourceDeleteHandler) GetEnqueueTime() time.Time {
+ return r.EnqueueTime
}
// Handle processes resources being deleted
func (r ResourceDeleteHandler) Handle() error {
+ startTime := time.Now()
+ result := "success"
+
+ defer func() {
+ r.Collectors.RecordReconcile(result, time.Since(startTime))
+ }()
+
if r.Resource == nil {
logrus.Errorf("Resource delete handler received nil resource")
+ result = "error"
} else {
config, _ := r.GetConfig()
// Send webhook
if options.WebhookUrl != "" {
- return sendUpgradeWebhook(config, options.WebhookUrl)
+ err := sendUpgradeWebhook(config, options.WebhookUrl)
+ if err != nil {
+ result = "error"
+ }
+ return err
}
// process resource based on its type
- return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy)
+ err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy)
+ if err != nil {
+ result = "error"
+ }
+ return err
}
return nil
}
diff --git a/internal/pkg/handler/handler.go b/internal/pkg/handler/handler.go
index 1f5858e..9018f80 100644
--- a/internal/pkg/handler/handler.go
+++ b/internal/pkg/handler/handler.go
@@ -1,9 +1,18 @@
package handler
-import "github.com/stakater/Reloader/pkg/common"
+import (
+ "time"
+
+ "github.com/stakater/Reloader/pkg/common"
+)
// ResourceHandler handles the creation and update of resources
type ResourceHandler interface {
Handle() error
GetConfig() (common.Config, string)
}
+
+// TimedHandler is a handler that tracks when it was enqueued
+type TimedHandler interface {
+ GetEnqueueTime() time.Time
+}
diff --git a/internal/pkg/handler/update.go b/internal/pkg/handler/update.go
index ae0bb1e..3ae1080 100644
--- a/internal/pkg/handler/update.go
+++ b/internal/pkg/handler/update.go
@@ -1,6 +1,8 @@
package handler
import (
+ "time"
+
"github.com/sirupsen/logrus"
"github.com/stakater/Reloader/internal/pkg/metrics"
"github.com/stakater/Reloader/internal/pkg/options"
@@ -16,21 +18,47 @@ type ResourceUpdatedHandler struct {
OldResource interface{}
Collectors metrics.Collectors
Recorder record.EventRecorder
+ EnqueueTime time.Time // Time when this handler was added to the queue
+}
+
+// GetEnqueueTime returns when this handler was enqueued
+func (r ResourceUpdatedHandler) GetEnqueueTime() time.Time {
+ return r.EnqueueTime
}
// Handle processes the updated resource
func (r ResourceUpdatedHandler) Handle() error {
+ startTime := time.Now()
+ result := "success"
+
+ defer func() {
+ r.Collectors.RecordReconcile(result, time.Since(startTime))
+ }()
+
if r.Resource == nil || r.OldResource == nil {
logrus.Errorf("Resource update handler received nil resource")
+ result = "error"
} else {
config, oldSHAData := r.GetConfig()
if config.SHAValue != oldSHAData {
// Send a webhook if update
if options.WebhookUrl != "" {
- return sendUpgradeWebhook(config, options.WebhookUrl)
+ err := sendUpgradeWebhook(config, options.WebhookUrl)
+ if err != nil {
+ result = "error"
+ }
+ return err
}
// process resource based on its type
- return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
+ err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
+ if err != nil {
+ result = "error"
+ }
+ return err
+ } else {
+ // No data change - skip
+ result = "skipped"
+ r.Collectors.RecordSkipped("no_data_change")
}
}
return nil
diff --git a/internal/pkg/handler/upgrade.go b/internal/pkg/handler/upgrade.go
index 6f185f1..e355d5f 100644
--- a/internal/pkg/handler/upgrade.go
+++ b/internal/pkg/handler/upgrade.go
@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"os"
+ "time"
"github.com/parnurzeal/gorequest"
"github.com/prometheus/client_golang/prometheus"
@@ -236,23 +237,34 @@ func rollingUpgrade(clients kube.Clients, config common.Config, upgradeFuncs cal
func PerformAction(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy) error {
items := upgradeFuncs.ItemsFunc(clients, config.Namespace)
+ // Record workloads scanned
+ collectors.RecordWorkloadsScanned(upgradeFuncs.ResourceType, len(items))
+
+ matchedCount := 0
for _, item := range items {
- err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) error {
- return upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource)
+ err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) (bool, error) {
+ matched, err := upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource)
+ if matched {
+ matchedCount++
+ }
+ return matched, err
})
if err != nil {
return err
}
}
+ // Record workloads matched
+ collectors.RecordWorkloadsMatched(upgradeFuncs.ResourceType, matchedCount)
+
return nil
}
-func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error {
+func retryOnConflict(backoff wait.Backoff, fn func(_ bool) (bool, error)) error {
var lastError error
fetchResource := false // do not fetch resource on first attempt, already done by ItemsFunc
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
- err := fn(fetchResource)
+ _, err := fn(fetchResource)
fetchResource = true
switch {
case err == nil:
@@ -270,17 +282,19 @@ func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error {
return err
}
-func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) error {
+func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) (bool, error) {
+ actionStartTime := time.Now()
+
accessor, err := meta.Accessor(resource)
if err != nil {
- return err
+ return false, err
}
resourceName := accessor.GetName()
if fetchResource {
resource, err = upgradeFuncs.ItemFunc(clients, resourceName, config.Namespace)
if err != nil {
- return err
+ return false, err
}
}
annotations := upgradeFuncs.AnnotationsFunc(resource)
@@ -289,13 +303,14 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
if !result.ShouldReload {
logrus.Debugf("No changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace)
- return nil
+ return false, nil
}
strategyResult := strategy(upgradeFuncs, resource, config, result.AutoReload)
if strategyResult.Result != constants.Updated {
- return nil
+ collectors.RecordSkipped("strategy_not_updated")
+ return false, nil
}
// find correct annotation and update the resource
@@ -309,7 +324,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
_, err = PauseDeployment(deployment, clients, config.Namespace, pauseInterval)
if err != nil {
logrus.Errorf("Failed to pause deployment '%s' in namespace '%s': %v", resourceName, config.Namespace, err)
- return err
+ return true, err
}
}
}
@@ -320,16 +335,19 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
err = upgradeFuncs.UpdateFunc(clients, config.Namespace, resource)
}
+ actionLatency := time.Since(actionStartTime)
+
if err != nil {
message := fmt.Sprintf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err)
logrus.Errorf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err)
collectors.Reloaded.With(prometheus.Labels{"success": "false"}).Inc()
collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "false", "namespace": config.Namespace}).Inc()
+ collectors.RecordAction(upgradeFuncs.ResourceType, "error", actionLatency)
if recorder != nil {
recorder.Event(resource, v1.EventTypeWarning, "ReloadFail", message)
}
- return err
+ return true, err
} else {
message := fmt.Sprintf("Changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace)
message += fmt.Sprintf(", Updated '%s' of type '%s' in namespace '%s'", resourceName, upgradeFuncs.ResourceType, config.Namespace)
@@ -338,6 +356,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
collectors.Reloaded.With(prometheus.Labels{"success": "true"}).Inc()
collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "true", "namespace": config.Namespace}).Inc()
+ collectors.RecordAction(upgradeFuncs.ResourceType, "success", actionLatency)
alert_on_reload, ok := os.LookupEnv("ALERT_ON_RELOAD")
if recorder != nil {
recorder.Event(resource, v1.EventTypeNormal, "Reloaded", message)
@@ -350,7 +369,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
}
}
- return nil
+ return true, nil
}
func getVolumeMountName(volumes []v1.Volume, mountType string, volumeName string) string {
diff --git a/internal/pkg/metrics/prometheus.go b/internal/pkg/metrics/prometheus.go
index 94153ea..e6f2f35 100644
--- a/internal/pkg/metrics/prometheus.go
+++ b/internal/pkg/metrics/prometheus.go
@@ -1,54 +1,407 @@
package metrics
import (
+ "context"
"net/http"
+ "net/url"
"os"
+ "time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
+ "k8s.io/client-go/tools/metrics"
)
+// clientGoRequestMetrics implements metrics.LatencyMetric and metrics.ResultMetric
+// to expose client-go's rest_client_requests_total metric
+type clientGoRequestMetrics struct {
+ requestCounter *prometheus.CounterVec
+ requestLatency *prometheus.HistogramVec
+}
+
+func (m *clientGoRequestMetrics) Increment(ctx context.Context, code string, method string, host string) {
+ m.requestCounter.WithLabelValues(code, method, host).Inc()
+}
+
+func (m *clientGoRequestMetrics) Observe(ctx context.Context, verb string, u url.URL, latency time.Duration) {
+ m.requestLatency.WithLabelValues(verb, u.Host).Observe(latency.Seconds())
+}
+
+var clientGoMetrics = &clientGoRequestMetrics{
+ requestCounter: prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Name: "rest_client_requests_total",
+ Help: "Number of HTTP requests, partitioned by status code, method, and host.",
+ },
+ []string{"code", "method", "host"},
+ ),
+ requestLatency: prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "rest_client_request_duration_seconds",
+ Help: "Request latency in seconds. Broken down by verb and host.",
+ Buckets: []float64{0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30},
+ },
+ []string{"verb", "host"},
+ ),
+}
+
+func init() {
+ // Register the metrics collectors
+ prometheus.MustRegister(clientGoMetrics.requestCounter)
+ prometheus.MustRegister(clientGoMetrics.requestLatency)
+
+ // Register our metrics implementation with client-go
+ metrics.RequestResult = clientGoMetrics
+ metrics.RequestLatency = clientGoMetrics
+}
+
+// Collectors holds all Prometheus metrics collectors for Reloader.
type Collectors struct {
+ // Existing metrics (preserved for backward compatibility)
Reloaded *prometheus.CounterVec
ReloadedByNamespace *prometheus.CounterVec
+ countByNamespace bool
+
+ // Reconcile/Handler metrics
+ ReconcileTotal *prometheus.CounterVec // Total reconcile calls by result
+ ReconcileDuration *prometheus.HistogramVec // Time spent in reconcile/handler
+
+ // Action metrics
+ ActionTotal *prometheus.CounterVec // Total actions by workload kind and result
+ ActionLatency *prometheus.HistogramVec // Time from event to action applied
+
+ // Skip metrics
+ SkippedTotal *prometheus.CounterVec // Skipped operations by reason
+
+ // Queue metrics
+ QueueDepth prometheus.Gauge // Current queue depth
+ QueueAdds prometheus.Counter // Total items added to queue
+ QueueLatency *prometheus.HistogramVec // Time spent in queue
+
+ // Error and retry metrics
+ ErrorsTotal *prometheus.CounterVec // Errors by type
+ RetriesTotal prometheus.Counter // Total retries
+
+ // Event processing metrics
+ EventsReceived *prometheus.CounterVec // Events received by type (add/update/delete)
+ EventsProcessed *prometheus.CounterVec // Events processed by type and result
+
+ // Resource discovery metrics
+ WorkloadsScanned *prometheus.CounterVec // Workloads scanned by kind
+ WorkloadsMatched *prometheus.CounterVec // Workloads matched for reload by kind
+}
+
+// RecordReload records a reload event with the given success status and namespace.
+// Preserved for backward compatibility.
+func (c *Collectors) RecordReload(success bool, namespace string) {
+ if c == nil {
+ return
+ }
+
+ successLabel := "false"
+ if success {
+ successLabel = "true"
+ }
+
+ c.Reloaded.With(prometheus.Labels{"success": successLabel}).Inc()
+
+ if c.countByNamespace {
+ c.ReloadedByNamespace.With(prometheus.Labels{
+ "success": successLabel,
+ "namespace": namespace,
+ }).Inc()
+ }
+}
+
+// RecordReconcile records a reconcile/handler invocation.
+func (c *Collectors) RecordReconcile(result string, duration time.Duration) {
+ if c == nil {
+ return
+ }
+ c.ReconcileTotal.With(prometheus.Labels{"result": result}).Inc()
+ c.ReconcileDuration.With(prometheus.Labels{"result": result}).Observe(duration.Seconds())
+}
+
+// RecordAction records a reload action on a workload.
+func (c *Collectors) RecordAction(workloadKind string, result string, latency time.Duration) {
+ if c == nil {
+ return
+ }
+ c.ActionTotal.With(prometheus.Labels{"workload_kind": workloadKind, "result": result}).Inc()
+ c.ActionLatency.With(prometheus.Labels{"workload_kind": workloadKind}).Observe(latency.Seconds())
+}
+
+// RecordSkipped records a skipped operation with reason.
+func (c *Collectors) RecordSkipped(reason string) {
+ if c == nil {
+ return
+ }
+ c.SkippedTotal.With(prometheus.Labels{"reason": reason}).Inc()
+}
+
+// RecordQueueAdd records an item being added to the queue.
+func (c *Collectors) RecordQueueAdd() {
+ if c == nil {
+ return
+ }
+ c.QueueAdds.Inc()
+}
+
+// SetQueueDepth sets the current queue depth.
+func (c *Collectors) SetQueueDepth(depth int) {
+ if c == nil {
+ return
+ }
+ c.QueueDepth.Set(float64(depth))
+}
+
+// RecordQueueLatency records how long an item spent in the queue.
+func (c *Collectors) RecordQueueLatency(latency time.Duration) {
+ if c == nil {
+ return
+ }
+ c.QueueLatency.With(prometheus.Labels{}).Observe(latency.Seconds())
+}
+
+// RecordError records an error by type.
+func (c *Collectors) RecordError(errorType string) {
+ if c == nil {
+ return
+ }
+ c.ErrorsTotal.With(prometheus.Labels{"type": errorType}).Inc()
+}
+
+// RecordRetry records a retry attempt.
+func (c *Collectors) RecordRetry() {
+ if c == nil {
+ return
+ }
+ c.RetriesTotal.Inc()
+}
+
+// RecordEventReceived records an event being received.
+func (c *Collectors) RecordEventReceived(eventType string, resourceType string) {
+ if c == nil {
+ return
+ }
+ c.EventsReceived.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType}).Inc()
+}
+
+// RecordEventProcessed records an event being processed.
+func (c *Collectors) RecordEventProcessed(eventType string, resourceType string, result string) {
+ if c == nil {
+ return
+ }
+ c.EventsProcessed.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType, "result": result}).Inc()
+}
+
+// RecordWorkloadsScanned records workloads scanned during a reconcile.
+func (c *Collectors) RecordWorkloadsScanned(kind string, count int) {
+ if c == nil {
+ return
+ }
+ c.WorkloadsScanned.With(prometheus.Labels{"kind": kind}).Add(float64(count))
+}
+
+// RecordWorkloadsMatched records workloads matched for reload.
+func (c *Collectors) RecordWorkloadsMatched(kind string, count int) {
+ if c == nil {
+ return
+ }
+ c.WorkloadsMatched.With(prometheus.Labels{"kind": kind}).Add(float64(count))
}
func NewCollectors() Collectors {
+ // Existing metrics (preserved)
reloaded := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "reload_executed_total",
Help: "Counter of reloads executed by Reloader.",
},
- []string{
- "success",
- },
+ []string{"success"},
)
-
- //set 0 as default value
reloaded.With(prometheus.Labels{"success": "true"}).Add(0)
reloaded.With(prometheus.Labels{"success": "false"}).Add(0)
- reloaded_by_namespace := prometheus.NewCounterVec(
+ reloadedByNamespace := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "reload_executed_total_by_namespace",
Help: "Counter of reloads executed by Reloader by namespace.",
},
- []string{
- "success",
- "namespace",
+ []string{"success", "namespace"},
+ )
+
+ // === NEW: Comprehensive metrics ===
+
+ reconcileTotal := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "reconcile_total",
+ Help: "Total number of reconcile/handler invocations by result.",
+ },
+ []string{"result"},
+ )
+
+ reconcileDuration := prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "reloader",
+ Name: "reconcile_duration_seconds",
+ Help: "Time spent in reconcile/handler in seconds.",
+ Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
+ },
+ []string{"result"},
+ )
+
+ actionTotal := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "action_total",
+ Help: "Total number of reload actions by workload kind and result.",
+ },
+ []string{"workload_kind", "result"},
+ )
+
+ actionLatency := prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "reloader",
+ Name: "action_latency_seconds",
+ Help: "Time from event received to action applied in seconds.",
+ Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60},
+ },
+ []string{"workload_kind"},
+ )
+
+ skippedTotal := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "skipped_total",
+ Help: "Total number of skipped operations by reason.",
+ },
+ []string{"reason"},
+ )
+
+ queueDepth := prometheus.NewGauge(
+ prometheus.GaugeOpts{
+ Namespace: "reloader",
+ Name: "workqueue_depth",
+ Help: "Current depth of the work queue.",
},
)
+
+ queueAdds := prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "workqueue_adds_total",
+ Help: "Total number of items added to the work queue.",
+ },
+ )
+
+ queueLatency := prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Namespace: "reloader",
+ Name: "workqueue_latency_seconds",
+ Help: "Time spent in the work queue in seconds.",
+ Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
+ },
+ []string{},
+ )
+
+ errorsTotal := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "errors_total",
+ Help: "Total number of errors by type.",
+ },
+ []string{"type"},
+ )
+
+ retriesTotal := prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "retries_total",
+ Help: "Total number of retry attempts.",
+ },
+ )
+
+ eventsReceived := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "events_received_total",
+ Help: "Total number of events received by type and resource.",
+ },
+ []string{"event_type", "resource_type"},
+ )
+
+ eventsProcessed := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "events_processed_total",
+ Help: "Total number of events processed by type, resource, and result.",
+ },
+ []string{"event_type", "resource_type", "result"},
+ )
+
+ workloadsScanned := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "workloads_scanned_total",
+ Help: "Total number of workloads scanned by kind.",
+ },
+ []string{"kind"},
+ )
+
+ workloadsMatched := prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: "reloader",
+ Name: "workloads_matched_total",
+ Help: "Total number of workloads matched for reload by kind.",
+ },
+ []string{"kind"},
+ )
+
return Collectors{
Reloaded: reloaded,
- ReloadedByNamespace: reloaded_by_namespace,
+ ReloadedByNamespace: reloadedByNamespace,
+ countByNamespace: os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled",
+
+ ReconcileTotal: reconcileTotal,
+ ReconcileDuration: reconcileDuration,
+ ActionTotal: actionTotal,
+ ActionLatency: actionLatency,
+ SkippedTotal: skippedTotal,
+ QueueDepth: queueDepth,
+ QueueAdds: queueAdds,
+ QueueLatency: queueLatency,
+ ErrorsTotal: errorsTotal,
+ RetriesTotal: retriesTotal,
+ EventsReceived: eventsReceived,
+ EventsProcessed: eventsProcessed,
+ WorkloadsScanned: workloadsScanned,
+ WorkloadsMatched: workloadsMatched,
}
}
func SetupPrometheusEndpoint() Collectors {
collectors := NewCollectors()
+
+ // Register all metrics
prometheus.MustRegister(collectors.Reloaded)
+ prometheus.MustRegister(collectors.ReconcileTotal)
+ prometheus.MustRegister(collectors.ReconcileDuration)
+ prometheus.MustRegister(collectors.ActionTotal)
+ prometheus.MustRegister(collectors.ActionLatency)
+ prometheus.MustRegister(collectors.SkippedTotal)
+ prometheus.MustRegister(collectors.QueueDepth)
+ prometheus.MustRegister(collectors.QueueAdds)
+ prometheus.MustRegister(collectors.QueueLatency)
+ prometheus.MustRegister(collectors.ErrorsTotal)
+ prometheus.MustRegister(collectors.RetriesTotal)
+ prometheus.MustRegister(collectors.EventsReceived)
+ prometheus.MustRegister(collectors.EventsProcessed)
+ prometheus.MustRegister(collectors.WorkloadsScanned)
+ prometheus.MustRegister(collectors.WorkloadsMatched)
if os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled" {
prometheus.MustRegister(collectors.ReloadedByNamespace)
diff --git a/test/loadtest/README.md b/test/loadtest/README.md
new file mode 100644
index 0000000..7182bb3
--- /dev/null
+++ b/test/loadtest/README.md
@@ -0,0 +1,544 @@
+# Reloader Load Test Framework
+
+This framework provides A/B comparison testing between two Reloader container images.
+
+## Overview
+
+The load test framework:
+1. Creates a local kind cluster (1 control-plane + 6 worker nodes)
+2. Deploys Prometheus for metrics collection
+3. Loads the provided Reloader container images into the cluster
+4. Runs standardized test scenarios (S1-S13)
+5. Collects metrics via Prometheus scraping
+6. Generates comparison reports with pass/fail criteria
+
+## Prerequisites
+
+- Docker or Podman
+- kind (Kubernetes in Docker)
+- kubectl
+- Go 1.22+
+
+## Building
+
+```bash
+cd test/loadtest
+go build -o loadtest ./cmd/loadtest
+```
+
+## Quick Start
+
+```bash
+# Compare two published images (e.g., different versions)
+./loadtest run \
+ --old-image=stakater/reloader:v1.0.0 \
+ --new-image=stakater/reloader:v1.1.0
+
+# Run a specific scenario
+./loadtest run \
+ --old-image=stakater/reloader:v1.0.0 \
+ --new-image=stakater/reloader:v1.1.0 \
+ --scenario=S2 \
+ --duration=120
+
+# Test only a single image (no comparison)
+./loadtest run --new-image=myregistry/reloader:dev
+
+# Use local images built with docker/podman
+./loadtest run \
+ --old-image=localhost/reloader:baseline \
+ --new-image=localhost/reloader:feature-branch
+
+# Skip cluster creation (use existing kind cluster)
+./loadtest run \
+ --old-image=stakater/reloader:v1.0.0 \
+ --new-image=stakater/reloader:v1.1.0 \
+ --skip-cluster
+
+# Run all scenarios in parallel on 4 clusters (faster execution)
+./loadtest run \
+ --new-image=localhost/reloader:dev \
+ --parallelism=4
+
+# Run all 13 scenarios in parallel (one cluster per scenario)
+./loadtest run \
+ --new-image=localhost/reloader:dev \
+ --parallelism=13
+
+# Generate report from existing results
+./loadtest report --scenario=S2 --results-dir=./results
+```
+
+## Command Line Options
+
+### Run Command
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `--old-image=IMAGE` | Container image for "old" version | - |
+| `--new-image=IMAGE` | Container image for "new" version | - |
+| `--scenario=ID` | Test scenario: S1-S13 or "all" | all |
+| `--duration=SECONDS` | Test duration in seconds | 60 |
+| `--parallelism=N` | Run N scenarios in parallel on N kind clusters | 1 |
+| `--skip-cluster` | Skip kind cluster creation (use existing, only for parallelism=1) | false |
+| `--results-dir=DIR` | Directory for results | ./results |
+
+**Note:** At least one of `--old-image` or `--new-image` is required. Provide both for A/B comparison.
+
+### Report Command
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `--scenario=ID` | Scenario to report on (required) | - |
+| `--results-dir=DIR` | Directory containing results | ./results |
+| `--output=FILE` | Output file (default: stdout) | - |
+
+## Test Scenarios
+
+| ID | Name | Description |
+|-----|-----------------------|-------------------------------------------------|
+| S1 | Burst Updates | Many ConfigMap/Secret updates in quick succession |
+| S2 | Fan-Out | One ConfigMap used by many (50) workloads |
+| S3 | High Cardinality | Many CMs/Secrets across many namespaces |
+| S4 | No-Op Updates | Updates that don't change data (annotation only)|
+| S5 | Workload Churn | Deployments created/deleted rapidly |
+| S6 | Controller Restart | Restart controller pod under load |
+| S7 | API Pressure | Many concurrent update requests |
+| S8 | Large Objects | ConfigMaps > 100KB |
+| S9 | Multi-Workload Types | Tests all workload types (Deploy, STS, DS) |
+| S10 | Secrets + Mixed | Secrets and mixed ConfigMap+Secret workloads |
+| S11 | Annotation Strategy | Tests `--reload-strategy=annotations` |
+| S12 | Pause & Resume | Tests pause-period during rapid updates |
+| S13 | Complex References | Init containers, valueFrom, projected volumes |
+
+## Metrics Reference
+
+This section explains each metric collected during load tests, what it measures, and what different values might indicate.
+
+### Counter Metrics (Totals)
+
+#### `reconcile_total`
+**What it measures:** The total number of reconciliation loops executed by the controller.
+
+**What it indicates:**
+- **Higher in new vs old:** The new controller-runtime implementation may batch events differently. This is often expected behavior, not a problem.
+- **Lower in new vs old:** Better event batching/deduplication. Controller-runtime's work queue naturally deduplicates events.
+- **Expected behavior:** The new implementation typically has *fewer* reconciles due to intelligent event batching.
+
+#### `action_total`
+**What it measures:** The total number of reload actions triggered (rolling restarts of Deployments/StatefulSets/DaemonSets).
+
+**What it indicates:**
+- **Should match expected value:** Both implementations should trigger the same number of reloads for the same workload.
+- **Lower than expected:** Some updates were missed - potential bug or race condition.
+- **Higher than expected:** Duplicate reloads triggered - inefficiency but not data loss.
+
+#### `reload_executed_total`
+**What it measures:** Successful reload operations executed, labeled by `success=true/false`.
+
+**What it indicates:**
+- **`success=true` count:** Number of workloads successfully restarted.
+- **`success=false` count:** Failed restart attempts (API errors, permission issues).
+- **Should match `action_total`:** If significantly lower, reloads are failing.
+
+#### `workloads_scanned_total`
+**What it measures:** Number of workloads (Deployments, etc.) scanned when checking for ConfigMap/Secret references.
+
+**What it indicates:**
+- **High count:** Controller is scanning many workloads per reconcile.
+- **Expected behavior:** Should roughly match the number of workloads × number of reconciles.
+- **Optimization signal:** If very high, namespace filtering or label selectors could help.
+
+#### `workloads_matched_total`
+**What it measures:** Number of workloads that matched (reference the changed ConfigMap/Secret).
+
+**What it indicates:**
+- **Should match `reload_executed_total`:** Every matched workload should be reloaded.
+- **Higher than reloads:** Some matched workloads weren't reloaded (potential issue).
+
+#### `errors_total`
+**What it measures:** Total errors encountered, labeled by error type.
+
+**What it indicates:**
+- **Should be 0:** Any errors indicate problems.
+- **Common causes:** API server timeouts, RBAC issues, resource conflicts.
+- **Critical metric:** Non-zero errors in production should be investigated.
+
+### API Efficiency Metrics (REST Client)
+
+These metrics track Kubernetes API server calls made by Reloader. Lower values indicate more efficient operation with less API server load.
+
+#### `rest_client_requests_total`
+**What it measures:** Total number of HTTP requests made to the Kubernetes API server.
+
+**What it indicates:**
+- **Lower is better:** Fewer API calls means less load on the API server.
+- **High count:** May indicate inefficient caching or excessive reconciles.
+- **Comparison use:** Shows overall API efficiency between implementations.
+
+#### `rest_client_requests_get`
+**What it measures:** Number of GET requests (fetching individual resources or listings).
+
+**What it indicates:**
+- **Includes:** Fetching ConfigMaps, Secrets, Deployments, etc.
+- **Higher count:** More frequent resource fetching, possibly due to cache misses.
+- **Expected behavior:** Controller-runtime's caching should reduce GET requests compared to direct API calls.
+
+#### `rest_client_requests_patch`
+**What it measures:** Number of PATCH requests (partial updates to resources).
+
+**What it indicates:**
+- **Used for:** Rolling restart annotations on workloads.
+- **Should correlate with:** `reload_executed_total` - each reload typically requires one PATCH.
+- **Lower is better:** Fewer patches means more efficient batching or deduplication.
+
+#### `rest_client_requests_put`
+**What it measures:** Number of PUT requests (full resource updates).
+
+**What it indicates:**
+- **Used for:** Full object replacements (less common than PATCH).
+- **Should be low:** Most updates use PATCH for efficiency.
+- **High count:** May indicate suboptimal update strategy.
+
+#### `rest_client_requests_errors`
+**What it measures:** Number of failed API requests (4xx/5xx responses).
+
+**What it indicates:**
+- **Should be 0:** Errors indicate API server issues or permission problems.
+- **Common causes:** Rate limiting, RBAC issues, resource conflicts, network issues.
+- **Non-zero:** Investigate API server logs and Reloader permissions.
+
+### Latency Metrics (Percentiles)
+
+All latency metrics are reported in **seconds**. The report shows p50 (median), p95, and p99 percentiles.
+
+#### `reconcile_duration (s)`
+**What it measures:** Time spent inside each reconcile loop, from start to finish.
+
+**What it indicates:**
+- **p50 (median):** Typical reconcile time. Should be < 100ms for good performance.
+- **p95:** 95th percentile - only 5% of reconciles take longer than this.
+- **p99:** 99th percentile - indicates worst-case performance.
+
+**Interpreting differences:**
+- **New higher than old:** Controller-runtime reconciles may do more work per loop but run fewer times. Check `reconcile_total` - if it's lower, this is expected.
+- **Minor differences (< 0.5s absolute):** Not significant for sub-second values.
+
+#### `action_latency (s)`
+**What it measures:** End-to-end time from ConfigMap/Secret change detection to workload restart triggered.
+
+**What it indicates:**
+- **This is the user-facing latency:** How long users wait for their config changes to take effect.
+- **p50 < 1s:** Excellent - most changes apply within a second.
+- **p95 < 5s:** Good - even under load, changes apply quickly.
+- **p99 > 10s:** May need investigation - some changes take too long.
+
+**What affects this:**
+- API server responsiveness
+- Number of workloads to scan
+- Concurrent updates competing for resources
+
+### Understanding the Report
+
+#### Report Columns
+
+```
+Metric Old New Expected Old✓ New✓ Status
+------ --- --- -------- ---- ---- ------
+action_total 100.00 100.00 100 ✓ ✓ pass
+action_latency_p95 (s) 0.15 0.04 - - - pass
+```
+
+- **Old/New:** Measured values from each implementation
+- **Expected:** Known expected value (for throughput metrics)
+- **Old✓/New✓:** Whether the value is within 15% of expected (✓ = yes, ✗ = no, - = no expected value)
+- **Status:** pass/fail based on comparison thresholds
+
+#### Pass/Fail Logic
+
+| Metric Type | Pass Condition |
+|-------------|----------------|
+| Throughput (action_total, reload_executed_total) | New value within 15% of expected |
+| Latency (p50, p95, p99) | New not more than threshold% worse than old, OR absolute difference < minimum threshold |
+| Errors | New ≤ Old (ideally both 0) |
+| API Efficiency (rest_client_requests_*) | New ≤ Old (lower is better), or New not more than 50% higher |
+
+#### Latency Thresholds
+
+Latency comparisons use both percentage AND absolute thresholds to avoid false failures:
+
+| Metric | Max % Worse | Min Absolute Diff |
+|--------|-------------|-------------------|
+| p50 | 100% | 0.5s |
+| p95 | 100% | 1.0s |
+| p99 | 100% | 1.0s |
+
+**Example:** If old p50 = 0.01s and new p50 = 0.08s:
+- Percentage difference: +700% (would fail % check)
+- Absolute difference: 0.07s (< 0.5s threshold)
+- **Result: PASS** (both values are fast enough that the difference doesn't matter)
+
+### Resource Consumption Metrics
+
+These metrics track CPU, memory, and Go runtime resource usage. Lower values generally indicate more efficient operation.
+
+#### Memory Metrics
+
+| Metric | Description | Unit |
+|--------|-------------|------|
+| `memory_rss_mb_avg` | Average RSS (resident set size) memory | MB |
+| `memory_rss_mb_max` | Peak RSS memory during test | MB |
+| `memory_heap_mb_avg` | Average Go heap allocation | MB |
+| `memory_heap_mb_max` | Peak Go heap allocation | MB |
+
+**What to watch for:**
+- **High RSS:** May indicate memory leaks or inefficient caching
+- **High heap:** Many objects being created (check GC metrics)
+- **Growing over time:** Potential memory leak
+
+#### CPU Metrics
+
+| Metric | Description | Unit |
+|--------|-------------|------|
+| `cpu_cores_avg` | Average CPU usage rate | cores |
+| `cpu_cores_max` | Peak CPU usage rate | cores |
+
+**What to watch for:**
+- **High CPU:** Inefficient algorithms or excessive reconciles
+- **Spiky max:** May indicate burst handling issues
+
+#### Go Runtime Metrics
+
+| Metric | Description | Unit |
+|--------|-------------|------|
+| `goroutines_avg` | Average goroutine count | count |
+| `goroutines_max` | Peak goroutine count | count |
+| `gc_pause_p99_ms` | 99th percentile GC pause time | ms |
+
+**What to watch for:**
+- **High goroutines:** Potential goroutine leak or unbounded concurrency
+- **High GC pause:** Large heap or allocation pressure
+
+### Scenario-Specific Expectations
+
+| Scenario | Key Metrics to Watch | Expected Behavior |
+|----------|---------------------|-------------------|
+| S1 (Burst) | action_latency_p99, cpu_cores_max, goroutines_max | Should handle bursts without queue backup |
+| S2 (Fan-Out) | reconcile_total, workloads_matched, memory_rss_mb_max | One CM change → 50 workload reloads |
+| S3 (High Cardinality) | reconcile_duration, memory_heap_mb_avg | Many namespaces shouldn't increase memory |
+| S4 (No-Op) | action_total = 0, cpu_cores_avg should be low | Minimal resource usage for no-op |
+| S5 (Churn) | errors_total, goroutines_avg | Graceful handling, no goroutine leak |
+| S6 (Restart) | All metrics captured | Metrics survive controller restart |
+| S7 (API Pressure) | errors_total, cpu_cores_max, goroutines_max | No errors under concurrent load |
+| S8 (Large Objects) | memory_rss_mb_max, gc_pause_p99_ms | Large ConfigMaps don't cause OOM or GC issues |
+| S9 (Multi-Workload) | reload_executed_total per type | All workload types (Deploy, STS, DS) reload |
+| S10 (Secrets) | reload_executed_total, workloads_matched | Both Secrets and ConfigMaps trigger reloads |
+| S11 (Annotation) | workload annotations present | Deployments get `last-reloaded-from` annotation |
+| S12 (Pause) | reload_executed_total << updates | Pause-period reduces reload frequency |
+| S13 (Complex) | reload_executed_total | All reference types trigger reloads |
+
+### Troubleshooting
+
+#### New implementation shows 0 for all metrics
+- Check if Prometheus is scraping the new Reloader pod
+- Verify pod annotations: `prometheus.io/scrape: "true"`
+- Check Prometheus targets: `http://localhost:9091/targets`
+
+#### Metrics don't match expected values
+- Verify test ran to completion (check logs)
+- Ensure Prometheus scraped final metrics (18s wait after test)
+- Check for pod restarts during test (metrics reset on restart - handled by `increase()`)
+
+#### High latency in new implementation
+- Check Reloader pod resource limits
+- Look for API server throttling in logs
+- Compare `reconcile_total` - fewer reconciles with higher duration may be normal
+
+#### REST client errors are non-zero
+- **Common causes:**
+ - Optional CRD schemes registered but CRDs not installed (e.g., Argo Rollouts, OpenShift DeploymentConfig)
+ - API server rate limiting under high load
+ - RBAC permissions missing for certain resource types
+- **Argo Rollouts errors:** If you see ~4 errors per test, ensure `--enable-argo-rollouts=false` if not using Argo Rollouts
+- **OpenShift errors:** Similarly, ensure DeploymentConfig support is disabled on non-OpenShift clusters
+
+#### REST client requests much higher in new implementation
+- Check if caching is working correctly
+- Look for excessive re-queuing in controller logs
+- Compare `reconcile_total` - more reconciles naturally means more API calls
+
+## Report Format
+
+The report generator produces a comparison table with units and expected value indicators:
+
+```
+================================================================================
+ RELOADER A/B COMPARISON REPORT
+================================================================================
+
+Scenario: S2
+Generated: 2026-01-03 14:30:00
+Status: PASS
+Summary: All metrics within acceptable thresholds
+
+Test: S2: Fan-out test - 1 CM update triggers 50 deployment reloads
+
+--------------------------------------------------------------------------------
+ METRIC COMPARISONS
+--------------------------------------------------------------------------------
+(Old✓/New✓ = meets expected value within 15%)
+
+Metric Old New Expected Old✓ New✓ Status
+------ --- --- -------- ---- ---- ------
+reconcile_total 50.00 25.00 - - - pass
+reconcile_duration_p50 (s) 0.01 0.05 - - - pass
+reconcile_duration_p95 (s) 0.02 0.15 - - - pass
+action_total 50.00 50.00 50 ✓ ✓ pass
+action_latency_p50 (s) 0.05 0.03 - - - pass
+action_latency_p95 (s) 0.12 0.08 - - - pass
+errors_total 0.00 0.00 - - - pass
+reload_executed_total 50.00 50.00 50 ✓ ✓ pass
+workloads_scanned_total 50.00 50.00 50 ✓ ✓ pass
+workloads_matched_total 50.00 50.00 50 ✓ ✓ pass
+rest_client_requests_total 850 720 - - - pass
+rest_client_requests_get 500 420 - - - pass
+rest_client_requests_patch 300 250 - - - pass
+rest_client_requests_errors 0 0 - - - pass
+```
+
+Reports are saved to `results//report.txt` after each test.
+
+## Directory Structure
+
+```
+test/loadtest/
+├── cmd/
+│ └── loadtest/ # Unified CLI (run + report)
+│ └── main.go
+├── internal/
+│ ├── cluster/ # Kind cluster management
+│ │ └── kind.go
+│ ├── prometheus/ # Prometheus deployment & querying
+│ │ └── prometheus.go
+│ ├── reloader/ # Reloader deployment
+│ │ └── deploy.go
+│ └── scenarios/ # Test scenario implementations
+│ └── scenarios.go
+├── manifests/
+│ └── prometheus.yaml # Prometheus deployment manifest
+├── results/ # Generated after tests
+│ └── /
+│ ├── old/ # Old version data
+│ │ ├── *.json # Prometheus metric snapshots
+│ │ └── reloader.log # Reloader pod logs
+│ ├── new/ # New version data
+│ │ ├── *.json # Prometheus metric snapshots
+│ │ └── reloader.log # Reloader pod logs
+│ ├── expected.json # Expected values from test
+│ └── report.txt # Comparison report
+├── go.mod
+├── go.sum
+└── README.md
+```
+
+## Building Local Images for Testing
+
+If you want to test local code changes:
+
+```bash
+# Build the new Reloader image from current source
+docker build -t localhost/reloader:dev -f Dockerfile .
+
+# Build from a different branch/commit
+git checkout feature-branch
+docker build -t localhost/reloader:feature -f Dockerfile .
+
+# Then run comparison
+./loadtest run \
+ --old-image=stakater/reloader:v1.0.0 \
+ --new-image=localhost/reloader:feature
+```
+
+## Interpreting Results
+
+### PASS
+All metrics are within acceptable thresholds. The new implementation is comparable or better than the old one.
+
+### FAIL
+One or more metrics exceeded thresholds. Review the specific metrics:
+- **Latency degradation**: p95/p99 latencies are significantly higher
+- **Missed reloads**: `reload_executed_total` differs significantly
+- **Errors increased**: `errors_total` is higher in new version
+
+### Investigation
+
+If tests fail, check:
+1. Pod logs: `kubectl logs -n reloader-new deployment/reloader` (or check `results//new/reloader.log`)
+2. Resource usage: `kubectl top pods -n reloader-new`
+3. Events: `kubectl get events -n reloader-test`
+
+## Parallel Execution
+
+The `--parallelism` option enables running scenarios on multiple kind clusters simultaneously, significantly reducing total test time.
+
+### How It Works
+
+1. **Multiple Clusters**: Creates N kind clusters named `reloader-loadtest-0`, `reloader-loadtest-1`, etc.
+2. **Separate Prometheus**: Each cluster gets its own Prometheus instance with a unique port (9091, 9092, etc.)
+3. **Worker Pool**: Scenarios are distributed to workers via a channel, with each worker running on its own cluster
+4. **Independent Execution**: Each scenario runs in complete isolation with no resource contention
+
+### Usage
+
+```bash
+# Run 4 scenarios at a time (creates 4 clusters)
+./loadtest run --new-image=my-image:tag --parallelism=4
+
+# Run all 13 scenarios in parallel (creates 13 clusters)
+./loadtest run --new-image=my-image:tag --parallelism=13 --scenario=all
+```
+
+### Resource Requirements
+
+Parallel execution requires significant system resources:
+
+| Parallelism | Clusters | Est. Memory | Est. CPU |
+|-------------|----------|-------------|----------|
+| 1 (default) | 1 | ~4GB | 2-4 cores |
+| 4 | 4 | ~16GB | 8-16 cores |
+| 13 | 13 | ~52GB | 26-52 cores |
+
+### Notes
+
+- The `--skip-cluster` option is not supported with parallelism > 1
+- Each worker loads images independently, so initial setup takes longer
+- All results are written to the same `--results-dir` with per-scenario subdirectories
+- If a cluster setup fails, remaining workers continue with available clusters
+- Parallelism automatically reduces to match scenario count if set higher
+
+## CI Integration
+
+### GitHub Actions
+
+Load tests can be triggered on pull requests by commenting `/loadtest`:
+
+```
+/loadtest
+```
+
+This will:
+1. Build a container image from the PR branch
+2. Run all load test scenarios against it
+3. Post results as a PR comment
+4. Upload detailed results as artifacts
+
+### Make Target
+
+Run load tests locally or in CI:
+
+```bash
+# From repository root
+make loadtest
+```
+
+This builds the container image and runs all scenarios with a 60-second duration.
diff --git a/test/loadtest/cmd/loadtest/main.go b/test/loadtest/cmd/loadtest/main.go
new file mode 100644
index 0000000..19f7b1d
--- /dev/null
+++ b/test/loadtest/cmd/loadtest/main.go
@@ -0,0 +1,1582 @@
+// Package main implements the unified load test CLI for Reloader A/B comparison.
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "log"
+ "math"
+ "os"
+ "os/exec"
+ "os/signal"
+ "path/filepath"
+ "sort"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/stakater/Reloader/test/loadtest/internal/cluster"
+ "github.com/stakater/Reloader/test/loadtest/internal/prometheus"
+ "github.com/stakater/Reloader/test/loadtest/internal/reloader"
+ "github.com/stakater/Reloader/test/loadtest/internal/scenarios"
+ "k8s.io/client-go/kubernetes"
+ "k8s.io/client-go/tools/clientcmd"
+)
+
+const (
+ clusterName = "reloader-loadtest"
+ testNamespace = "reloader-test"
+)
+
+// workerContext holds all resources for a single worker (cluster + prometheus).
+type workerContext struct {
+ id int
+ clusterMgr *cluster.Manager
+ promMgr *prometheus.Manager
+ kubeClient kubernetes.Interface
+ kubeContext string
+ runtime string
+}
+
+// Config holds CLI configuration.
+type Config struct {
+ OldImage string
+ NewImage string
+ Scenario string
+ Duration int
+ SkipCluster bool
+ ResultsDir string
+ ManifestsDir string
+ Parallelism int // Number of parallel clusters (1 = sequential)
+}
+
+func main() {
+ if len(os.Args) < 2 {
+ printUsage()
+ os.Exit(1)
+ }
+
+ cmd := os.Args[1]
+ switch cmd {
+ case "run":
+ runCommand(os.Args[2:])
+ case "report":
+ reportCommand(os.Args[2:])
+ case "help", "--help", "-h":
+ printUsage()
+ default:
+ fmt.Printf("Unknown command: %s\n", cmd)
+ printUsage()
+ os.Exit(1)
+ }
+}
+
+func printUsage() {
+ fmt.Print(`Reloader Load Test CLI
+
+Usage:
+ loadtest run [options] Run A/B comparison tests
+ loadtest report [options] Generate comparison report
+ loadtest help Show this help
+
+Run Options:
+ --old-image=IMAGE Container image for "old" version (required for comparison)
+ --new-image=IMAGE Container image for "new" version (required for comparison)
+ --scenario=ID Test scenario: S1-S13 or "all" (default: all)
+ --duration=SECONDS Test duration in seconds (default: 60)
+ --parallelism=N Run N scenarios in parallel on N clusters (default: 1)
+ --skip-cluster Skip kind cluster creation (use existing, only for parallelism=1)
+ --results-dir=DIR Directory for results (default: ./results)
+
+Report Options:
+ --scenario=ID Scenario to report on (required)
+ --results-dir=DIR Directory containing results (default: ./results)
+ --output=FILE Output file (default: stdout)
+
+Examples:
+ # Compare two images
+ loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=stakater/reloader:v1.1.0
+
+ # Run specific scenario
+ loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=localhost/reloader:dev --scenario=S2
+
+ # Test single image (no comparison)
+ loadtest run --new-image=localhost/reloader:test
+
+ # Run all scenarios in parallel on 4 clusters
+ loadtest run --new-image=localhost/reloader:test --parallelism=4
+
+ # Run all 13 scenarios in parallel (one cluster per scenario)
+ loadtest run --new-image=localhost/reloader:test --parallelism=13
+
+ # Generate report
+ loadtest report --scenario=S2 --results-dir=./results
+`)
+}
+
+func parseArgs(args []string) Config {
+ cfg := Config{
+ Scenario: "all",
+ Duration: 60,
+ ResultsDir: "./results",
+ Parallelism: 1,
+ }
+
+ // Find manifests dir relative to executable or current dir
+ execPath, _ := os.Executable()
+ execDir := filepath.Dir(execPath)
+ cfg.ManifestsDir = filepath.Join(execDir, "..", "..", "manifests")
+ if _, err := os.Stat(cfg.ManifestsDir); os.IsNotExist(err) {
+ // Try relative to current dir
+ cfg.ManifestsDir = "./manifests"
+ }
+
+ for _, arg := range args {
+ switch {
+ case strings.HasPrefix(arg, "--old-image="):
+ cfg.OldImage = strings.TrimPrefix(arg, "--old-image=")
+ case strings.HasPrefix(arg, "--new-image="):
+ cfg.NewImage = strings.TrimPrefix(arg, "--new-image=")
+ case strings.HasPrefix(arg, "--scenario="):
+ cfg.Scenario = strings.TrimPrefix(arg, "--scenario=")
+ case strings.HasPrefix(arg, "--duration="):
+ fmt.Sscanf(strings.TrimPrefix(arg, "--duration="), "%d", &cfg.Duration)
+ case strings.HasPrefix(arg, "--parallelism="):
+ fmt.Sscanf(strings.TrimPrefix(arg, "--parallelism="), "%d", &cfg.Parallelism)
+ case arg == "--skip-cluster":
+ cfg.SkipCluster = true
+ case strings.HasPrefix(arg, "--results-dir="):
+ cfg.ResultsDir = strings.TrimPrefix(arg, "--results-dir=")
+ case strings.HasPrefix(arg, "--manifests-dir="):
+ cfg.ManifestsDir = strings.TrimPrefix(arg, "--manifests-dir=")
+ }
+ }
+
+ // Validate parallelism
+ if cfg.Parallelism < 1 {
+ cfg.Parallelism = 1
+ }
+
+ return cfg
+}
+
+func runCommand(args []string) {
+ cfg := parseArgs(args)
+
+ // Validate required args
+ if cfg.OldImage == "" && cfg.NewImage == "" {
+ log.Fatal("At least one of --old-image or --new-image is required")
+ }
+
+ // Determine mode
+ runOld := cfg.OldImage != ""
+ runNew := cfg.NewImage != ""
+ runBoth := runOld && runNew
+
+ log.Printf("Configuration:")
+ log.Printf(" Scenario: %s", cfg.Scenario)
+ log.Printf(" Duration: %ds", cfg.Duration)
+ log.Printf(" Parallelism: %d", cfg.Parallelism)
+ if cfg.OldImage != "" {
+ log.Printf(" Old image: %s", cfg.OldImage)
+ }
+ if cfg.NewImage != "" {
+ log.Printf(" New image: %s", cfg.NewImage)
+ }
+
+ // Detect container runtime
+ runtime, err := cluster.DetectContainerRuntime()
+ if err != nil {
+ log.Fatalf("Failed to detect container runtime: %v", err)
+ }
+ log.Printf(" Container runtime: %s", runtime)
+
+ // Setup context with signal handling
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ sigCh := make(chan os.Signal, 1)
+ signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+ go func() {
+ <-sigCh
+ log.Println("Received shutdown signal...")
+ cancel()
+ }()
+
+ // Determine scenarios to run
+ scenariosToRun := []string{cfg.Scenario}
+ if cfg.Scenario == "all" {
+ scenariosToRun = []string{"S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S11", "S12", "S13"}
+ }
+
+ // Skip-cluster only works for parallelism=1
+ if cfg.SkipCluster && cfg.Parallelism > 1 {
+ log.Fatal("--skip-cluster is not supported with --parallelism > 1")
+ }
+
+ // If parallelism > 1, use parallel execution
+ if cfg.Parallelism > 1 {
+ runParallel(ctx, cfg, scenariosToRun, runtime, runOld, runNew, runBoth)
+ return
+ }
+
+ // Sequential execution (parallelism == 1)
+ runSequential(ctx, cfg, scenariosToRun, runtime, runOld, runNew, runBoth)
+}
+
+// runSequential runs scenarios one by one on a single cluster.
+func runSequential(ctx context.Context, cfg Config, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) {
+ // Create cluster manager
+ clusterMgr := cluster.NewManager(cluster.Config{
+ Name: clusterName,
+ ContainerRuntime: runtime,
+ })
+
+ // Create/verify cluster
+ if cfg.SkipCluster {
+ log.Println("Skipping cluster creation (using existing)")
+ if !clusterMgr.Exists() {
+ log.Fatalf("Cluster %s does not exist. Remove --skip-cluster to create it.", clusterName)
+ }
+ } else {
+ log.Println("Creating kind cluster...")
+ if err := clusterMgr.Create(ctx); err != nil {
+ log.Fatalf("Failed to create cluster: %v", err)
+ }
+ }
+
+ // Deploy Prometheus
+ promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml")
+ promMgr := prometheus.NewManager(promManifest)
+
+ log.Println("Installing Prometheus...")
+ if err := promMgr.Deploy(ctx); err != nil {
+ log.Fatalf("Failed to deploy Prometheus: %v", err)
+ }
+
+ if err := promMgr.StartPortForward(ctx); err != nil {
+ log.Fatalf("Failed to start Prometheus port-forward: %v", err)
+ }
+ defer promMgr.StopPortForward()
+
+ // Load images into kind
+ log.Println("Loading images into kind cluster...")
+ if runOld {
+ log.Printf("Loading old image: %s", cfg.OldImage)
+ if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil {
+ log.Fatalf("Failed to load old image: %v", err)
+ }
+ }
+ if runNew {
+ log.Printf("Loading new image: %s", cfg.NewImage)
+ if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil {
+ log.Fatalf("Failed to load new image: %v", err)
+ }
+ }
+
+ // Pre-pull test images
+ log.Println("Pre-loading test images...")
+ testImage := "gcr.io/google-containers/busybox:1.27"
+ clusterMgr.LoadImage(ctx, testImage) // Ignore errors
+
+ // Get kubernetes client
+ kubeClient, err := getKubeClient("")
+ if err != nil {
+ log.Fatalf("Failed to create kubernetes client: %v", err)
+ }
+
+ for _, scenarioID := range scenariosToRun {
+ log.Printf("========================================")
+ log.Printf("=== Starting scenario %s ===", scenarioID)
+ log.Printf("========================================")
+
+ // Clean up from previous scenario
+ cleanupTestNamespaces(ctx, "")
+ cleanupReloader(ctx, "old", "")
+ cleanupReloader(ctx, "new", "")
+
+ // Reset Prometheus
+ if err := promMgr.Reset(ctx); err != nil {
+ log.Printf("Warning: failed to reset Prometheus: %v", err)
+ }
+
+ // Create test namespace
+ createTestNamespace(ctx, "")
+
+ if runOld {
+ // Test old version
+ oldMgr := reloader.NewManager(reloader.Config{
+ Version: "old",
+ Image: cfg.OldImage,
+ })
+
+ if err := oldMgr.Deploy(ctx); err != nil {
+ log.Printf("Failed to deploy old Reloader: %v", err)
+ continue
+ }
+
+ // Wait for Prometheus to discover and scrape the Reloader
+ if err := promMgr.WaitForTarget(ctx, oldMgr.Job(), 60*time.Second); err != nil {
+ log.Printf("Warning: %v", err)
+ log.Println("Proceeding anyway, but metrics may be incomplete")
+ }
+
+ runScenario(ctx, kubeClient, scenarioID, "old", cfg.OldImage, cfg.Duration, cfg.ResultsDir)
+ collectMetrics(ctx, promMgr, oldMgr.Job(), scenarioID, "old", cfg.ResultsDir)
+ collectLogs(ctx, oldMgr, scenarioID, "old", cfg.ResultsDir)
+
+ if runBoth {
+ // Clean up for new version
+ cleanupTestNamespaces(ctx, "")
+ oldMgr.Cleanup(ctx)
+ promMgr.Reset(ctx)
+ createTestNamespace(ctx, "")
+ }
+ }
+
+ if runNew {
+ // Test new version
+ newMgr := reloader.NewManager(reloader.Config{
+ Version: "new",
+ Image: cfg.NewImage,
+ })
+
+ if err := newMgr.Deploy(ctx); err != nil {
+ log.Printf("Failed to deploy new Reloader: %v", err)
+ continue
+ }
+
+ // Wait for Prometheus to discover and scrape the Reloader
+ if err := promMgr.WaitForTarget(ctx, newMgr.Job(), 60*time.Second); err != nil {
+ log.Printf("Warning: %v", err)
+ log.Println("Proceeding anyway, but metrics may be incomplete")
+ }
+
+ runScenario(ctx, kubeClient, scenarioID, "new", cfg.NewImage, cfg.Duration, cfg.ResultsDir)
+ collectMetrics(ctx, promMgr, newMgr.Job(), scenarioID, "new", cfg.ResultsDir)
+ collectLogs(ctx, newMgr, scenarioID, "new", cfg.ResultsDir)
+ }
+
+ // Generate report
+ generateReport(scenarioID, cfg.ResultsDir, runBoth)
+
+ log.Printf("=== Scenario %s complete ===", scenarioID)
+ }
+
+ log.Println("Load test complete!")
+ log.Printf("Results available in: %s", cfg.ResultsDir)
+}
+
+// runParallel runs scenarios in parallel on N separate kind clusters.
+func runParallel(ctx context.Context, cfg Config, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) {
+ numWorkers := cfg.Parallelism
+ if numWorkers > len(scenariosToRun) {
+ numWorkers = len(scenariosToRun)
+ log.Printf("Reducing parallelism to %d (number of scenarios)", numWorkers)
+ }
+
+ log.Printf("Starting parallel execution with %d workers", numWorkers)
+
+ // Create workers
+ workers := make([]*workerContext, numWorkers)
+ var setupWg sync.WaitGroup
+ setupErrors := make(chan error, numWorkers)
+
+ log.Println("Setting up worker clusters...")
+ for i := 0; i < numWorkers; i++ {
+ setupWg.Add(1)
+ go func(workerID int) {
+ defer setupWg.Done()
+ worker, err := setupWorker(ctx, cfg, workerID, runtime, runOld, runNew)
+ if err != nil {
+ setupErrors <- fmt.Errorf("worker %d setup failed: %w", workerID, err)
+ return
+ }
+ workers[workerID] = worker
+ }(i)
+ }
+
+ setupWg.Wait()
+ close(setupErrors)
+
+ // Check for setup errors
+ for err := range setupErrors {
+ log.Printf("Error: %v", err)
+ }
+
+ // Verify all workers are ready
+ readyWorkers := 0
+ for _, w := range workers {
+ if w != nil {
+ readyWorkers++
+ }
+ }
+ if readyWorkers == 0 {
+ log.Fatal("No workers ready, aborting")
+ }
+ if readyWorkers < numWorkers {
+ log.Printf("Warning: only %d/%d workers ready", readyWorkers, numWorkers)
+ }
+
+ // Cleanup workers on exit
+ defer func() {
+ log.Println("Cleaning up worker clusters...")
+ for _, w := range workers {
+ if w != nil {
+ w.promMgr.StopPortForward()
+ }
+ }
+ }()
+
+ // Create scenario channel
+ scenarioCh := make(chan string, len(scenariosToRun))
+ for _, s := range scenariosToRun {
+ scenarioCh <- s
+ }
+ close(scenarioCh)
+
+ // Results tracking
+ var resultsMu sync.Mutex
+ completedScenarios := make([]string, 0, len(scenariosToRun))
+
+ // Start workers
+ var wg sync.WaitGroup
+ for _, worker := range workers {
+ if worker == nil {
+ continue
+ }
+ wg.Add(1)
+ go func(w *workerContext) {
+ defer wg.Done()
+ for scenarioID := range scenarioCh {
+ select {
+ case <-ctx.Done():
+ return
+ default:
+ }
+
+ log.Printf("[Worker %d] Starting scenario %s", w.id, scenarioID)
+
+ // Clean up from previous scenario
+ cleanupTestNamespaces(ctx, w.kubeContext)
+ cleanupReloader(ctx, "old", w.kubeContext)
+ cleanupReloader(ctx, "new", w.kubeContext)
+
+ // Reset Prometheus
+ if err := w.promMgr.Reset(ctx); err != nil {
+ log.Printf("[Worker %d] Warning: failed to reset Prometheus: %v", w.id, err)
+ }
+
+ // Create test namespace
+ createTestNamespace(ctx, w.kubeContext)
+
+ if runOld {
+ runVersionOnWorker(ctx, w, cfg, scenarioID, "old", cfg.OldImage, runBoth)
+ }
+
+ if runNew {
+ runVersionOnWorker(ctx, w, cfg, scenarioID, "new", cfg.NewImage, false)
+ }
+
+ // Generate report
+ generateReport(scenarioID, cfg.ResultsDir, runBoth)
+
+ resultsMu.Lock()
+ completedScenarios = append(completedScenarios, scenarioID)
+ resultsMu.Unlock()
+
+ log.Printf("[Worker %d] Scenario %s complete", w.id, scenarioID)
+ }
+ }(worker)
+ }
+
+ wg.Wait()
+
+ log.Println("Load test complete!")
+ log.Printf("Completed %d/%d scenarios", len(completedScenarios), len(scenariosToRun))
+ log.Printf("Results available in: %s", cfg.ResultsDir)
+}
+
+// setupWorker creates a cluster and deploys prometheus for a single worker.
+func setupWorker(ctx context.Context, cfg Config, workerID int, runtime string, runOld, runNew bool) (*workerContext, error) {
+ workerName := fmt.Sprintf("%s-%d", clusterName, workerID)
+ promPort := 9091 + workerID
+
+ log.Printf("[Worker %d] Creating cluster %s (ports %d/%d)...", workerID, workerName, 8080+workerID, 8443+workerID)
+
+ clusterMgr := cluster.NewManager(cluster.Config{
+ Name: workerName,
+ ContainerRuntime: runtime,
+ PortOffset: workerID, // Each cluster gets unique ports
+ })
+
+ if err := clusterMgr.Create(ctx); err != nil {
+ return nil, fmt.Errorf("creating cluster: %w", err)
+ }
+
+ kubeContext := clusterMgr.Context()
+
+ // Deploy Prometheus
+ promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml")
+ promMgr := prometheus.NewManagerWithPort(promManifest, promPort, kubeContext)
+
+ log.Printf("[Worker %d] Installing Prometheus (port %d)...", workerID, promPort)
+ if err := promMgr.Deploy(ctx); err != nil {
+ return nil, fmt.Errorf("deploying prometheus: %w", err)
+ }
+
+ if err := promMgr.StartPortForward(ctx); err != nil {
+ return nil, fmt.Errorf("starting prometheus port-forward: %w", err)
+ }
+
+ // Load images
+ log.Printf("[Worker %d] Loading images...", workerID)
+ if runOld {
+ if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil {
+ log.Printf("[Worker %d] Warning: failed to load old image: %v", workerID, err)
+ }
+ }
+ if runNew {
+ if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil {
+ log.Printf("[Worker %d] Warning: failed to load new image: %v", workerID, err)
+ }
+ }
+
+ // Pre-pull test images
+ testImage := "gcr.io/google-containers/busybox:1.27"
+ clusterMgr.LoadImage(ctx, testImage)
+
+ // Get kubernetes client for this context
+ kubeClient, err := getKubeClient(kubeContext)
+ if err != nil {
+ return nil, fmt.Errorf("creating kubernetes client: %w", err)
+ }
+
+ log.Printf("[Worker %d] Ready", workerID)
+ return &workerContext{
+ id: workerID,
+ clusterMgr: clusterMgr,
+ promMgr: promMgr,
+ kubeClient: kubeClient,
+ kubeContext: kubeContext,
+ runtime: runtime,
+ }, nil
+}
+
+// runVersionOnWorker runs a single version test on a worker.
+func runVersionOnWorker(ctx context.Context, w *workerContext, cfg Config, scenarioID, version, image string, cleanupAfter bool) {
+ mgr := reloader.NewManager(reloader.Config{
+ Version: version,
+ Image: image,
+ })
+ mgr.SetKubeContext(w.kubeContext)
+
+ if err := mgr.Deploy(ctx); err != nil {
+ log.Printf("[Worker %d] Failed to deploy %s Reloader: %v", w.id, version, err)
+ return
+ }
+
+ // Wait for Prometheus to discover and scrape the Reloader
+ if err := w.promMgr.WaitForTarget(ctx, mgr.Job(), 60*time.Second); err != nil {
+ log.Printf("[Worker %d] Warning: %v", w.id, err)
+ log.Printf("[Worker %d] Proceeding anyway, but metrics may be incomplete", w.id)
+ }
+
+ runScenario(ctx, w.kubeClient, scenarioID, version, image, cfg.Duration, cfg.ResultsDir)
+ collectMetrics(ctx, w.promMgr, mgr.Job(), scenarioID, version, cfg.ResultsDir)
+ collectLogs(ctx, mgr, scenarioID, version, cfg.ResultsDir)
+
+ if cleanupAfter {
+ cleanupTestNamespaces(ctx, w.kubeContext)
+ mgr.Cleanup(ctx)
+ w.promMgr.Reset(ctx)
+ createTestNamespace(ctx, w.kubeContext)
+ }
+}
+
+func runScenario(ctx context.Context, client kubernetes.Interface, scenarioID, version, image string, duration int, resultsDir string) {
+ runner, ok := scenarios.Registry[scenarioID]
+ if !ok {
+ log.Printf("Unknown scenario: %s", scenarioID)
+ return
+ }
+
+ // For S6, set the reloader version
+ if s6, ok := runner.(*scenarios.ControllerRestartScenario); ok {
+ s6.ReloaderVersion = version
+ }
+
+ // For S11, set the image to deploy its own Reloader
+ if s11, ok := runner.(*scenarios.AnnotationStrategyScenario); ok {
+ s11.Image = image
+ }
+
+ log.Printf("Running scenario %s (%s): %s", scenarioID, version, runner.Description())
+
+ // Debug: check parent context state
+ if ctx.Err() != nil {
+ log.Printf("WARNING: Parent context already done: %v", ctx.Err())
+ }
+
+ // Add extra time for scenario setup (creating deployments, waiting for ready state)
+ // Some scenarios like S2 create 50 deployments which can take 2-3 minutes
+ timeout := time.Duration(duration)*time.Second + 5*time.Minute
+ log.Printf("Creating scenario context with timeout: %v (duration=%ds)", timeout, duration)
+
+ scenarioCtx, cancel := context.WithTimeout(ctx, timeout)
+ defer cancel()
+
+ expected, err := runner.Run(scenarioCtx, client, testNamespace, time.Duration(duration)*time.Second)
+ if err != nil {
+ log.Printf("Scenario %s failed: %v", scenarioID, err)
+ }
+
+ scenarios.WriteExpectedMetrics(scenarioID, resultsDir, expected)
+}
+
+func collectMetrics(ctx context.Context, promMgr *prometheus.Manager, job, scenarioID, version, resultsDir string) {
+ log.Printf("Waiting 5s for Reloader to finish processing events...")
+ time.Sleep(5 * time.Second)
+
+ log.Printf("Waiting 8s for Prometheus to scrape final metrics...")
+ time.Sleep(8 * time.Second)
+
+ log.Printf("Collecting metrics for %s...", version)
+ outputDir := filepath.Join(resultsDir, scenarioID, version)
+ if err := promMgr.CollectMetrics(ctx, job, outputDir, scenarioID); err != nil {
+ log.Printf("Failed to collect metrics: %v", err)
+ }
+}
+
+func collectLogs(ctx context.Context, mgr *reloader.Manager, scenarioID, version, resultsDir string) {
+ log.Printf("Collecting logs for %s...", version)
+ logPath := filepath.Join(resultsDir, scenarioID, version, "reloader.log")
+ if err := mgr.CollectLogs(ctx, logPath); err != nil {
+ log.Printf("Failed to collect logs: %v", err)
+ }
+}
+
+func generateReport(scenarioID, resultsDir string, isComparison bool) {
+ if isComparison {
+ log.Println("Generating comparison report...")
+ } else {
+ log.Println("Generating single-version report...")
+ }
+
+ reportPath := filepath.Join(resultsDir, scenarioID, "report.txt")
+
+ // Use the report command
+ cmd := exec.Command(os.Args[0], "report",
+ fmt.Sprintf("--scenario=%s", scenarioID),
+ fmt.Sprintf("--results-dir=%s", resultsDir),
+ fmt.Sprintf("--output=%s", reportPath))
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ cmd.Run()
+
+ // Also print to stdout
+ if data, err := os.ReadFile(reportPath); err == nil {
+ fmt.Println(string(data))
+ }
+
+ log.Printf("Report saved to: %s", reportPath)
+}
+
+func getKubeClient(kubeContext string) (kubernetes.Interface, error) {
+ kubeconfig := os.Getenv("KUBECONFIG")
+ if kubeconfig == "" {
+ home, _ := os.UserHomeDir()
+ kubeconfig = filepath.Join(home, ".kube", "config")
+ }
+
+ loadingRules := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig}
+ configOverrides := &clientcmd.ConfigOverrides{}
+ if kubeContext != "" {
+ configOverrides.CurrentContext = kubeContext
+ }
+
+ kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides)
+ config, err := kubeConfig.ClientConfig()
+ if err != nil {
+ return nil, err
+ }
+
+ return kubernetes.NewForConfig(config)
+}
+
+func createTestNamespace(ctx context.Context, kubeContext string) {
+ args := []string{"create", "namespace", testNamespace, "--dry-run=client", "-o", "yaml"}
+ if kubeContext != "" {
+ args = append([]string{"--context", kubeContext}, args...)
+ }
+ cmd := exec.CommandContext(ctx, "kubectl", args...)
+ out, _ := cmd.Output()
+
+ applyArgs := []string{"apply", "-f", "-"}
+ if kubeContext != "" {
+ applyArgs = append([]string{"--context", kubeContext}, applyArgs...)
+ }
+ applyCmd := exec.CommandContext(ctx, "kubectl", applyArgs...)
+ applyCmd.Stdin = strings.NewReader(string(out))
+ applyCmd.Run()
+}
+
+func cleanupTestNamespaces(ctx context.Context, kubeContext string) {
+ log.Println("Cleaning up test resources...")
+
+ // Main namespace + S3 extra namespaces
+ namespaces := []string{testNamespace}
+ for i := 0; i < 10; i++ {
+ namespaces = append(namespaces, fmt.Sprintf("%s-%d", testNamespace, i))
+ }
+
+ for _, ns := range namespaces {
+ args := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"}
+ if kubeContext != "" {
+ args = append([]string{"--context", kubeContext}, args...)
+ }
+ exec.CommandContext(ctx, "kubectl", args...).Run()
+ }
+
+ // Wait a bit for cleanup
+ time.Sleep(2 * time.Second)
+
+ // Force delete remaining pods
+ for _, ns := range namespaces {
+ args := []string{"delete", "pods", "--all", "-n", ns, "--grace-period=0", "--force"}
+ if kubeContext != "" {
+ args = append([]string{"--context", kubeContext}, args...)
+ }
+ exec.CommandContext(ctx, "kubectl", args...).Run()
+ }
+}
+
+func cleanupReloader(ctx context.Context, version string, kubeContext string) {
+ ns := fmt.Sprintf("reloader-%s", version)
+
+ nsArgs := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"}
+ crArgs := []string{"delete", "clusterrole", fmt.Sprintf("reloader-%s", version), "--ignore-not-found"}
+ crbArgs := []string{"delete", "clusterrolebinding", fmt.Sprintf("reloader-%s", version), "--ignore-not-found"}
+
+ if kubeContext != "" {
+ nsArgs = append([]string{"--context", kubeContext}, nsArgs...)
+ crArgs = append([]string{"--context", kubeContext}, crArgs...)
+ crbArgs = append([]string{"--context", kubeContext}, crbArgs...)
+ }
+
+ exec.CommandContext(ctx, "kubectl", nsArgs...).Run()
+ exec.CommandContext(ctx, "kubectl", crArgs...).Run()
+ exec.CommandContext(ctx, "kubectl", crbArgs...).Run()
+}
+
+// ============================================================================
+// REPORT COMMAND
+// ============================================================================
+
+func reportCommand(args []string) {
+ var scenarioID, resultsDir, outputFile string
+ resultsDir = "./results"
+
+ for _, arg := range args {
+ switch {
+ case strings.HasPrefix(arg, "--scenario="):
+ scenarioID = strings.TrimPrefix(arg, "--scenario=")
+ case strings.HasPrefix(arg, "--results-dir="):
+ resultsDir = strings.TrimPrefix(arg, "--results-dir=")
+ case strings.HasPrefix(arg, "--output="):
+ outputFile = strings.TrimPrefix(arg, "--output=")
+ }
+ }
+
+ if scenarioID == "" {
+ log.Fatal("--scenario is required for report command")
+ }
+
+ report, err := generateScenarioReport(scenarioID, resultsDir)
+ if err != nil {
+ log.Fatalf("Failed to generate report: %v", err)
+ }
+
+ output := renderScenarioReport(report)
+
+ if outputFile != "" {
+ if err := os.WriteFile(outputFile, []byte(output), 0644); err != nil {
+ log.Fatalf("Failed to write output file: %v", err)
+ }
+ log.Printf("Report written to %s", outputFile)
+ } else {
+ fmt.Println(output)
+ }
+}
+
+// PrometheusResponse represents a Prometheus API response for report parsing.
+type PrometheusResponse struct {
+ Status string `json:"status"`
+ Data struct {
+ ResultType string `json:"resultType"`
+ Result []struct {
+ Metric map[string]string `json:"metric"`
+ Value []interface{} `json:"value"`
+ } `json:"result"`
+ } `json:"data"`
+}
+
+// MetricComparison represents the comparison of a single metric.
+type MetricComparison struct {
+ Name string
+ DisplayName string
+ Unit string
+ IsCounter bool
+ OldValue float64
+ NewValue float64
+ Expected float64
+ Difference float64
+ DiffPct float64
+ Status string
+ Threshold float64
+ OldMeetsExpected string
+ NewMeetsExpected string
+}
+
+type metricInfo struct {
+ unit string
+ isCounter bool
+}
+
+var metricInfoMap = map[string]metricInfo{
+ "reconcile_total": {unit: "count", isCounter: true},
+ "reconcile_duration_p50": {unit: "s", isCounter: false},
+ "reconcile_duration_p95": {unit: "s", isCounter: false},
+ "reconcile_duration_p99": {unit: "s", isCounter: false},
+ "action_total": {unit: "count", isCounter: true},
+ "action_latency_p50": {unit: "s", isCounter: false},
+ "action_latency_p95": {unit: "s", isCounter: false},
+ "action_latency_p99": {unit: "s", isCounter: false},
+ "errors_total": {unit: "count", isCounter: true},
+ "reload_executed_total": {unit: "count", isCounter: true},
+ "workloads_scanned_total": {unit: "count", isCounter: true},
+ "workloads_matched_total": {unit: "count", isCounter: true},
+ "skipped_total_no_data_change": {unit: "count", isCounter: true},
+ "rest_client_requests_total": {unit: "count", isCounter: true},
+ "rest_client_requests_get": {unit: "count", isCounter: true},
+ "rest_client_requests_patch": {unit: "count", isCounter: true},
+ "rest_client_requests_put": {unit: "count", isCounter: true},
+ "rest_client_requests_errors": {unit: "count", isCounter: true},
+
+ // Resource consumption metrics (gauges, not counters)
+ "memory_rss_mb_avg": {unit: "MB", isCounter: false},
+ "memory_rss_mb_max": {unit: "MB", isCounter: false},
+ "memory_heap_mb_avg": {unit: "MB", isCounter: false},
+ "memory_heap_mb_max": {unit: "MB", isCounter: false},
+ "cpu_cores_avg": {unit: "cores", isCounter: false},
+ "cpu_cores_max": {unit: "cores", isCounter: false},
+ "goroutines_avg": {unit: "count", isCounter: false},
+ "goroutines_max": {unit: "count", isCounter: false},
+ "gc_pause_p99_ms": {unit: "ms", isCounter: false},
+}
+
+// ReportExpectedMetrics matches the expected metrics from test scenarios.
+type ReportExpectedMetrics struct {
+ ActionTotal int `json:"action_total"`
+ ReloadExecutedTotal int `json:"reload_executed_total"`
+ ReconcileTotal int `json:"reconcile_total"`
+ WorkloadsScannedTotal int `json:"workloads_scanned_total"`
+ WorkloadsMatchedTotal int `json:"workloads_matched_total"`
+ SkippedTotal int `json:"skipped_total"`
+ Description string `json:"description"`
+}
+
+// ScenarioReport represents the full report for a scenario.
+type ScenarioReport struct {
+ Scenario string
+ Timestamp time.Time
+ Comparisons []MetricComparison
+ OverallStatus string
+ Summary string
+ PassCriteria []string
+ FailedCriteria []string
+ Expected ReportExpectedMetrics
+ TestDescription string
+}
+
+// MetricType defines how to evaluate a metric.
+type MetricType int
+
+const (
+ LowerIsBetter MetricType = iota
+ ShouldMatch
+ HigherIsBetter
+ Informational // Reports values but doesn't affect pass/fail
+)
+
+type ThresholdConfig struct {
+ maxDiff float64
+ metricType MetricType
+ minAbsDiff float64
+}
+
+var thresholds = map[string]ThresholdConfig{
+ "reconcile_total": {maxDiff: 60.0, metricType: LowerIsBetter},
+ "reconcile_duration_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5},
+ "reconcile_duration_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
+ "reconcile_duration_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
+ "action_latency_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5},
+ "action_latency_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
+ "action_latency_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
+ "errors_total": {maxDiff: 0.0, metricType: LowerIsBetter},
+ "action_total": {maxDiff: 15.0, metricType: ShouldMatch},
+ "reload_executed_total": {maxDiff: 15.0, metricType: ShouldMatch},
+ "workloads_scanned_total": {maxDiff: 15.0, metricType: ShouldMatch},
+ "workloads_matched_total": {maxDiff: 15.0, metricType: ShouldMatch},
+ "skipped_total_no_data_change": {maxDiff: 20.0, metricType: ShouldMatch},
+ // API metrics - use minAbsDiff to allow small differences
+ "rest_client_requests_total": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50},
+ "rest_client_requests_get": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50},
+ "rest_client_requests_patch": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50},
+ "rest_client_requests_put": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 20},
+ "rest_client_requests_errors": {maxDiff: 0.0, metricType: LowerIsBetter, minAbsDiff: 100}, // Pass if both < 100
+
+ // Resource consumption metrics
+ "memory_rss_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20}, // 50% or 20MB
+ "memory_rss_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 30}, // 50% or 30MB
+ "memory_heap_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 15}, // 50% or 15MB
+ "memory_heap_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20}, // 50% or 20MB
+ "cpu_cores_avg": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.1}, // 100% or 0.1 cores
+ "cpu_cores_max": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.2}, // 100% or 0.2 cores
+ "goroutines_avg": {metricType: Informational}, // Info only - different architectures may use more goroutines
+ "goroutines_max": {metricType: Informational}, // Info only - different architectures may use more goroutines
+ "gc_pause_p99_ms": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 5}, // 100% or 5ms
+}
+
+func generateScenarioReport(scenario, resultsDir string) (*ScenarioReport, error) {
+ oldDir := filepath.Join(resultsDir, scenario, "old")
+ newDir := filepath.Join(resultsDir, scenario, "new")
+ scenarioDir := filepath.Join(resultsDir, scenario)
+
+ // Check which directories exist to determine mode
+ _, oldErr := os.Stat(oldDir)
+ _, newErr := os.Stat(newDir)
+ hasOld := oldErr == nil
+ hasNew := newErr == nil
+ isComparison := hasOld && hasNew
+
+ // For single-version mode, determine which version we have
+ singleVersion := ""
+ singleDir := ""
+ if !isComparison {
+ if hasNew {
+ singleVersion = "new"
+ singleDir = newDir
+ } else if hasOld {
+ singleVersion = "old"
+ singleDir = oldDir
+ } else {
+ return nil, fmt.Errorf("no results found in %s", scenarioDir)
+ }
+ }
+
+ report := &ScenarioReport{
+ Scenario: scenario,
+ Timestamp: time.Now(),
+ }
+
+ // Load expected metrics
+ expectedPath := filepath.Join(scenarioDir, "expected.json")
+ if data, err := os.ReadFile(expectedPath); err == nil {
+ if err := json.Unmarshal(data, &report.Expected); err != nil {
+ log.Printf("Warning: Could not parse expected metrics: %v", err)
+ } else {
+ report.TestDescription = report.Expected.Description
+ }
+ }
+
+ // Handle single-version mode
+ if !isComparison {
+ return generateSingleVersionReport(report, singleDir, singleVersion, scenario)
+ }
+
+ // Define metrics to compare
+ metricsToCompare := []struct {
+ name string
+ file string
+ selector func(data PrometheusResponse) float64
+ }{
+ {"reconcile_total", "reloader_reconcile_total.json", sumAllValues},
+ {"reconcile_duration_p50", "reconcile_p50.json", getFirstValue},
+ {"reconcile_duration_p95", "reconcile_p95.json", getFirstValue},
+ {"reconcile_duration_p99", "reconcile_p99.json", getFirstValue},
+ {"action_total", "reloader_action_total.json", sumAllValues},
+ {"action_latency_p50", "action_p50.json", getFirstValue},
+ {"action_latency_p95", "action_p95.json", getFirstValue},
+ {"action_latency_p99", "action_p99.json", getFirstValue},
+ {"errors_total", "reloader_errors_total.json", sumAllValues},
+ {"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues},
+ {"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues},
+ {"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues},
+ {"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue},
+ {"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue},
+ {"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue},
+ {"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue},
+ {"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue},
+
+ // Resource consumption metrics
+ {"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB},
+ {"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB},
+ {"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB},
+ {"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB},
+ {"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue},
+ {"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue},
+ {"goroutines_avg", "goroutines_avg.json", getFirstValue},
+ {"goroutines_max", "goroutines_max.json", getFirstValue},
+ {"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs},
+ }
+
+ // Build expected values map
+ expectedValues := map[string]float64{
+ "action_total": float64(report.Expected.ActionTotal),
+ "reload_executed_total": float64(report.Expected.ReloadExecutedTotal),
+ "reconcile_total": float64(report.Expected.ReconcileTotal),
+ "workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal),
+ "workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal),
+ "skipped_total": float64(report.Expected.SkippedTotal),
+ }
+
+ // First pass: collect all metric values
+ metricValues := make(map[string]struct{ old, new, expected float64 })
+
+ for _, m := range metricsToCompare {
+ oldData, err := loadMetricFile(filepath.Join(oldDir, m.file))
+ if err != nil {
+ log.Printf("Warning: Could not load old metric %s: %v", m.name, err)
+ continue
+ }
+
+ newData, err := loadMetricFile(filepath.Join(newDir, m.file))
+ if err != nil {
+ log.Printf("Warning: Could not load new metric %s: %v", m.name, err)
+ continue
+ }
+
+ oldValue := m.selector(oldData)
+ newValue := m.selector(newData)
+ expected := expectedValues[m.name]
+
+ metricValues[m.name] = struct{ old, new, expected float64 }{oldValue, newValue, expected}
+ }
+
+ // Check context for smart pass/fail decisions
+ newMeetsActionExpected := false
+ newReconcileIsZero := false
+ isChurnScenario := scenario == "S5" // Workload churn has special pass/fail rules
+ if v, ok := metricValues["action_total"]; ok && v.expected > 0 {
+ tolerance := v.expected * 0.15
+ newMeetsActionExpected = math.Abs(v.new-v.expected) <= tolerance
+ }
+ if v, ok := metricValues["reconcile_total"]; ok {
+ newReconcileIsZero = v.new == 0
+ }
+
+ // Second pass: generate comparisons with context awareness
+ for _, m := range metricsToCompare {
+ v, ok := metricValues[m.name]
+ if !ok {
+ continue
+ }
+
+ comparison := compareMetricWithExpected(m.name, v.old, v.new, v.expected)
+
+ // Context-aware adjustments for API metrics
+ if strings.HasPrefix(m.name, "rest_client_requests") {
+ // If new correctly processed all expected reloads but old didn't,
+ // higher API calls in new is expected (it's doing the work correctly)
+ if newMeetsActionExpected && comparison.Status != "pass" {
+ if oldMeets, ok := metricValues["action_total"]; ok {
+ oldTolerance := oldMeets.expected * 0.15
+ oldMissed := math.Abs(oldMeets.old-oldMeets.expected) > oldTolerance
+ if oldMissed {
+ comparison.Status = "pass"
+ }
+ }
+ }
+ // If new has 0 reconciles (no-op scenario), API differences are fine
+ if newReconcileIsZero && comparison.Status != "pass" {
+ comparison.Status = "pass"
+ }
+ }
+
+ // S5 (Workload Churn) specific adjustments:
+ // - "Not found" errors are expected when deployments are deleted during processing
+ // - No expected values for throughput, so compare old vs new (should be similar)
+ if isChurnScenario {
+ if m.name == "errors_total" {
+ // In churn scenarios, "not found" errors are expected when workloads
+ // are deleted while Reloader is processing them. Allow up to 50 errors.
+ if v.new < 50 && v.old < 50 {
+ comparison.Status = "pass"
+ } else if v.new <= v.old*1.5 {
+ // Also pass if new has similar or fewer errors than old
+ comparison.Status = "pass"
+ }
+ }
+ if m.name == "action_total" || m.name == "reload_executed_total" {
+ // No expected value for churn - compare old vs new
+ // Both should be similar (within 20% of each other)
+ if v.old > 0 {
+ diff := math.Abs(v.new-v.old) / v.old * 100
+ if diff <= 20 {
+ comparison.Status = "pass"
+ }
+ } else if v.new > 0 {
+ // Old is 0, new has value - that's fine
+ comparison.Status = "pass"
+ }
+ }
+ }
+
+ report.Comparisons = append(report.Comparisons, comparison)
+
+ if comparison.Status == "pass" {
+ report.PassCriteria = append(report.PassCriteria, m.name)
+ } else if comparison.Status == "fail" {
+ report.FailedCriteria = append(report.FailedCriteria, m.name)
+ }
+ }
+
+ // Determine overall status
+ if len(report.FailedCriteria) == 0 {
+ report.OverallStatus = "PASS"
+ report.Summary = "All metrics within acceptable thresholds"
+ } else {
+ report.OverallStatus = "FAIL"
+ report.Summary = fmt.Sprintf("%d metrics failed: %s",
+ len(report.FailedCriteria),
+ strings.Join(report.FailedCriteria, ", "))
+ }
+
+ return report, nil
+}
+
+// generateSingleVersionReport creates a report for a single version (no comparison).
+func generateSingleVersionReport(report *ScenarioReport, dataDir, version, scenario string) (*ScenarioReport, error) {
+ // Define metrics to collect
+ metricsToCollect := []struct {
+ name string
+ file string
+ selector func(data PrometheusResponse) float64
+ }{
+ {"reconcile_total", "reloader_reconcile_total.json", sumAllValues},
+ {"reconcile_duration_p50", "reconcile_p50.json", getFirstValue},
+ {"reconcile_duration_p95", "reconcile_p95.json", getFirstValue},
+ {"reconcile_duration_p99", "reconcile_p99.json", getFirstValue},
+ {"action_total", "reloader_action_total.json", sumAllValues},
+ {"action_latency_p50", "action_p50.json", getFirstValue},
+ {"action_latency_p95", "action_p95.json", getFirstValue},
+ {"action_latency_p99", "action_p99.json", getFirstValue},
+ {"errors_total", "reloader_errors_total.json", sumAllValues},
+ {"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues},
+ {"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues},
+ {"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues},
+ {"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue},
+ {"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue},
+ {"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue},
+ {"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue},
+ {"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue},
+ {"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB},
+ {"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB},
+ {"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB},
+ {"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB},
+ {"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue},
+ {"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue},
+ {"goroutines_avg", "goroutines_avg.json", getFirstValue},
+ {"goroutines_max", "goroutines_max.json", getFirstValue},
+ {"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs},
+ }
+
+ // Build expected values map
+ expectedValues := map[string]float64{
+ "action_total": float64(report.Expected.ActionTotal),
+ "reload_executed_total": float64(report.Expected.ReloadExecutedTotal),
+ "reconcile_total": float64(report.Expected.ReconcileTotal),
+ "workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal),
+ "workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal),
+ "skipped_total": float64(report.Expected.SkippedTotal),
+ }
+
+ for _, m := range metricsToCollect {
+ data, err := loadMetricFile(filepath.Join(dataDir, m.file))
+ if err != nil {
+ log.Printf("Warning: Could not load metric %s: %v", m.name, err)
+ continue
+ }
+
+ value := m.selector(data)
+ expected := expectedValues[m.name]
+
+ info := metricInfoMap[m.name]
+ if info.unit == "" {
+ info = metricInfo{unit: "count", isCounter: true}
+ }
+
+ displayName := m.name
+ if info.unit != "count" {
+ displayName = fmt.Sprintf("%s (%s)", m.name, info.unit)
+ }
+
+ // For single-version, put the value in NewValue column
+ status := "info"
+ meetsExp := "-"
+
+ // Check against expected if available
+ if expected > 0 {
+ meetsExp = meetsExpected(value, expected)
+ threshold, ok := thresholds[m.name]
+ if ok && threshold.metricType == ShouldMatch {
+ if meetsExp == "✓" {
+ status = "pass"
+ report.PassCriteria = append(report.PassCriteria, m.name)
+ } else {
+ status = "fail"
+ report.FailedCriteria = append(report.FailedCriteria, m.name)
+ }
+ }
+ }
+
+ if info.isCounter {
+ value = math.Round(value)
+ }
+
+ report.Comparisons = append(report.Comparisons, MetricComparison{
+ Name: m.name,
+ DisplayName: displayName,
+ Unit: info.unit,
+ IsCounter: info.isCounter,
+ OldValue: 0, // No old value in single-version mode
+ NewValue: value,
+ Expected: expected,
+ OldMeetsExpected: "-",
+ NewMeetsExpected: meetsExp,
+ Status: status,
+ })
+ }
+
+ if len(report.FailedCriteria) == 0 {
+ report.OverallStatus = "PASS"
+ report.Summary = fmt.Sprintf("Single-version test (%s) completed successfully", version)
+ } else {
+ report.OverallStatus = "FAIL"
+ report.Summary = fmt.Sprintf("%d metrics failed: %s",
+ len(report.FailedCriteria),
+ strings.Join(report.FailedCriteria, ", "))
+ }
+
+ return report, nil
+}
+
+func loadMetricFile(path string) (PrometheusResponse, error) {
+ var resp PrometheusResponse
+ data, err := os.ReadFile(path)
+ if err != nil {
+ return resp, err
+ }
+ err = json.Unmarshal(data, &resp)
+ return resp, err
+}
+
+func sumAllValues(data PrometheusResponse) float64 {
+ var sum float64
+ for _, result := range data.Data.Result {
+ if len(result.Value) >= 2 {
+ if v, ok := result.Value[1].(string); ok {
+ var f float64
+ fmt.Sscanf(v, "%f", &f)
+ sum += f
+ }
+ }
+ }
+ return sum
+}
+
+func sumSuccessValues(data PrometheusResponse) float64 {
+ var sum float64
+ for _, result := range data.Data.Result {
+ if result.Metric["success"] == "true" {
+ if len(result.Value) >= 2 {
+ if v, ok := result.Value[1].(string); ok {
+ var f float64
+ fmt.Sscanf(v, "%f", &f)
+ sum += f
+ }
+ }
+ }
+ }
+ return sum
+}
+
+func getFirstValue(data PrometheusResponse) float64 {
+ if len(data.Data.Result) > 0 && len(data.Data.Result[0].Value) >= 2 {
+ if v, ok := data.Data.Result[0].Value[1].(string); ok {
+ var f float64
+ fmt.Sscanf(v, "%f", &f)
+ return f
+ }
+ }
+ return 0
+}
+
+// bytesToMB converts bytes to megabytes.
+func bytesToMB(data PrometheusResponse) float64 {
+ bytes := getFirstValue(data)
+ return bytes / (1024 * 1024)
+}
+
+// secondsToMs converts seconds to milliseconds.
+func secondsToMs(data PrometheusResponse) float64 {
+ seconds := getFirstValue(data)
+ return seconds * 1000
+}
+
+func meetsExpected(value, expected float64) string {
+ if expected == 0 {
+ return "-"
+ }
+ tolerance := expected * 0.15
+ if math.Abs(value-expected) <= tolerance {
+ return "✓"
+ }
+ return "✗"
+}
+
+func compareMetricWithExpected(name string, oldValue, newValue, expected float64) MetricComparison {
+ diff := newValue - oldValue
+ absDiff := math.Abs(diff)
+ var diffPct float64
+ if oldValue != 0 {
+ diffPct = (diff / oldValue) * 100
+ } else if newValue != 0 {
+ diffPct = 100
+ }
+
+ threshold, ok := thresholds[name]
+ if !ok {
+ threshold = ThresholdConfig{maxDiff: 10.0, metricType: ShouldMatch}
+ }
+
+ info := metricInfoMap[name]
+ if info.unit == "" {
+ info = metricInfo{unit: "count", isCounter: true}
+ }
+ displayName := name
+ if info.unit != "count" {
+ displayName = fmt.Sprintf("%s (%s)", name, info.unit)
+ }
+
+ if info.isCounter {
+ oldValue = math.Round(oldValue)
+ newValue = math.Round(newValue)
+ }
+
+ status := "pass"
+ oldMeetsExp := meetsExpected(oldValue, expected)
+ newMeetsExp := meetsExpected(newValue, expected)
+
+ if expected > 0 && threshold.metricType == ShouldMatch {
+ if newMeetsExp == "✗" {
+ status = "fail"
+ }
+ } else {
+ switch threshold.metricType {
+ case LowerIsBetter:
+ if threshold.minAbsDiff > 0 && absDiff < threshold.minAbsDiff {
+ status = "pass"
+ } else if diffPct > threshold.maxDiff {
+ status = "fail"
+ }
+ case HigherIsBetter:
+ if diffPct < -threshold.maxDiff {
+ status = "fail"
+ }
+ case ShouldMatch:
+ if math.Abs(diffPct) > threshold.maxDiff {
+ status = "fail"
+ }
+ case Informational:
+ status = "info"
+ }
+ }
+
+ return MetricComparison{
+ Name: name,
+ DisplayName: displayName,
+ Unit: info.unit,
+ IsCounter: info.isCounter,
+ Expected: expected,
+ OldMeetsExpected: oldMeetsExp,
+ NewMeetsExpected: newMeetsExp,
+ OldValue: oldValue,
+ NewValue: newValue,
+ Difference: diff,
+ DiffPct: diffPct,
+ Status: status,
+ Threshold: threshold.maxDiff,
+ }
+}
+
+func renderScenarioReport(report *ScenarioReport) string {
+ var sb strings.Builder
+
+ // Detect single-version mode by checking if all OldValues are 0
+ isSingleVersion := true
+ for _, c := range report.Comparisons {
+ if c.OldValue != 0 {
+ isSingleVersion = false
+ break
+ }
+ }
+
+ sb.WriteString("\n")
+ sb.WriteString("================================================================================\n")
+ if isSingleVersion {
+ sb.WriteString(" RELOADER TEST REPORT\n")
+ } else {
+ sb.WriteString(" RELOADER A/B COMPARISON REPORT\n")
+ }
+ sb.WriteString("================================================================================\n\n")
+
+ fmt.Fprintf(&sb, "Scenario: %s\n", report.Scenario)
+ fmt.Fprintf(&sb, "Generated: %s\n", report.Timestamp.Format("2006-01-02 15:04:05"))
+ fmt.Fprintf(&sb, "Status: %s\n", report.OverallStatus)
+ fmt.Fprintf(&sb, "Summary: %s\n", report.Summary)
+
+ if report.TestDescription != "" {
+ fmt.Fprintf(&sb, "Test: %s\n", report.TestDescription)
+ }
+
+ if report.Expected.ActionTotal > 0 {
+ sb.WriteString("\n--------------------------------------------------------------------------------\n")
+ sb.WriteString(" EXPECTED VALUES\n")
+ sb.WriteString("--------------------------------------------------------------------------------\n")
+ fmt.Fprintf(&sb, "Expected Action Total: %d\n", report.Expected.ActionTotal)
+ fmt.Fprintf(&sb, "Expected Reload Executed Total: %d\n", report.Expected.ReloadExecutedTotal)
+ if report.Expected.SkippedTotal > 0 {
+ fmt.Fprintf(&sb, "Expected Skipped Total: %d\n", report.Expected.SkippedTotal)
+ }
+ }
+
+ sb.WriteString("\n--------------------------------------------------------------------------------\n")
+ if isSingleVersion {
+ sb.WriteString(" METRICS\n")
+ } else {
+ sb.WriteString(" METRIC COMPARISONS\n")
+ }
+ sb.WriteString("--------------------------------------------------------------------------------\n")
+
+ if isSingleVersion {
+ sb.WriteString("(✓ = meets expected value within 15%)\n\n")
+ fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n",
+ "Metric", "Value", "Expected", "Met?", "Status")
+ fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n",
+ "------", "-----", "--------", "----", "------")
+
+ for _, c := range report.Comparisons {
+ if c.IsCounter {
+ if c.Expected > 0 {
+ fmt.Fprintf(&sb, "%-32s %12.0f %10.0f %5s %8s\n",
+ c.DisplayName, c.NewValue, c.Expected,
+ c.NewMeetsExpected, c.Status)
+ } else {
+ fmt.Fprintf(&sb, "%-32s %12.0f %10s %5s %8s\n",
+ c.DisplayName, c.NewValue, "-",
+ c.NewMeetsExpected, c.Status)
+ }
+ } else {
+ fmt.Fprintf(&sb, "%-32s %12.4f %10s %5s %8s\n",
+ c.DisplayName, c.NewValue, "-",
+ c.NewMeetsExpected, c.Status)
+ }
+ }
+ } else {
+ sb.WriteString("(Old✓/New✓ = meets expected value within 15%)\n\n")
+
+ fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n",
+ "Metric", "Old", "New", "Expected", "Old✓", "New✓", "Status")
+ fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n",
+ "------", "---", "---", "--------", "----", "----", "------")
+
+ for _, c := range report.Comparisons {
+ if c.IsCounter {
+ if c.Expected > 0 {
+ fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10.0f %5s %5s %8s\n",
+ c.DisplayName, c.OldValue, c.NewValue, c.Expected,
+ c.OldMeetsExpected, c.NewMeetsExpected, c.Status)
+ } else {
+ fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10s %5s %5s %8s\n",
+ c.DisplayName, c.OldValue, c.NewValue, "-",
+ c.OldMeetsExpected, c.NewMeetsExpected, c.Status)
+ }
+ } else {
+ fmt.Fprintf(&sb, "%-32s %12.4f %12.4f %10s %5s %5s %8s\n",
+ c.DisplayName, c.OldValue, c.NewValue, "-",
+ c.OldMeetsExpected, c.NewMeetsExpected, c.Status)
+ }
+ }
+ }
+
+ sb.WriteString("\n--------------------------------------------------------------------------------\n")
+ sb.WriteString(" PASS/FAIL CRITERIA\n")
+ sb.WriteString("--------------------------------------------------------------------------------\n\n")
+
+ fmt.Fprintf(&sb, "Passed (%d):\n", len(report.PassCriteria))
+ for _, p := range report.PassCriteria {
+ fmt.Fprintf(&sb, " ✓ %s\n", p)
+ }
+
+ if len(report.FailedCriteria) > 0 {
+ fmt.Fprintf(&sb, "\nFailed (%d):\n", len(report.FailedCriteria))
+ for _, f := range report.FailedCriteria {
+ fmt.Fprintf(&sb, " ✗ %s\n", f)
+ }
+ }
+
+ sb.WriteString("\n--------------------------------------------------------------------------------\n")
+ sb.WriteString(" THRESHOLDS USED\n")
+ sb.WriteString("--------------------------------------------------------------------------------\n\n")
+
+ fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n",
+ "Metric", "Max Diff%", "Min Abs Diff", "Direction")
+ fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n",
+ "------", "---------", "------------", "---------")
+
+ // Sort threshold names
+ var names []string
+ for name := range thresholds {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+
+ for _, name := range names {
+ t := thresholds[name]
+ var direction string
+ switch t.metricType {
+ case LowerIsBetter:
+ direction = "lower is better"
+ case HigherIsBetter:
+ direction = "higher is better"
+ case ShouldMatch:
+ direction = "should match"
+ case Informational:
+ direction = "info only"
+ }
+ minAbsDiff := "-"
+ if t.minAbsDiff > 0 {
+ minAbsDiff = fmt.Sprintf("%.1fs", t.minAbsDiff)
+ }
+ fmt.Fprintf(&sb, "%-35s %9.1f%% %15s %18s\n",
+ name, t.maxDiff, minAbsDiff, direction)
+ }
+
+ sb.WriteString("\n================================================================================\n")
+
+ return sb.String()
+}
diff --git a/test/loadtest/go.mod b/test/loadtest/go.mod
new file mode 100644
index 0000000..ed52882
--- /dev/null
+++ b/test/loadtest/go.mod
@@ -0,0 +1,50 @@
+module github.com/stakater/Reloader/test/loadtest
+
+go 1.22.0
+
+require (
+ k8s.io/api v0.31.0
+ k8s.io/apimachinery v0.31.0
+ k8s.io/client-go v0.31.0
+)
+
+require (
+ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+ github.com/emicklei/go-restful/v3 v3.11.0 // indirect
+ github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+ github.com/go-logr/logr v1.4.2 // indirect
+ github.com/go-openapi/jsonpointer v0.19.6 // indirect
+ github.com/go-openapi/jsonreference v0.20.2 // indirect
+ github.com/go-openapi/swag v0.22.4 // indirect
+ github.com/gogo/protobuf v1.3.2 // indirect
+ github.com/golang/protobuf v1.5.4 // indirect
+ github.com/google/gnostic-models v0.6.8 // indirect
+ github.com/google/go-cmp v0.6.0 // indirect
+ github.com/google/gofuzz v1.2.0 // indirect
+ github.com/google/uuid v1.6.0 // indirect
+ github.com/imdario/mergo v0.3.6 // indirect
+ github.com/josharian/intern v1.0.0 // indirect
+ github.com/json-iterator/go v1.1.12 // indirect
+ github.com/mailru/easyjson v0.7.7 // indirect
+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+ github.com/modern-go/reflect2 v1.0.2 // indirect
+ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
+ github.com/spf13/pflag v1.0.5 // indirect
+ github.com/x448/float16 v0.8.4 // indirect
+ golang.org/x/net v0.26.0 // indirect
+ golang.org/x/oauth2 v0.21.0 // indirect
+ golang.org/x/sys v0.21.0 // indirect
+ golang.org/x/term v0.21.0 // indirect
+ golang.org/x/text v0.16.0 // indirect
+ golang.org/x/time v0.3.0 // indirect
+ google.golang.org/protobuf v1.34.2 // indirect
+ gopkg.in/inf.v0 v0.9.1 // indirect
+ gopkg.in/yaml.v2 v2.4.0 // indirect
+ gopkg.in/yaml.v3 v3.0.1 // indirect
+ k8s.io/klog/v2 v2.130.1 // indirect
+ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+ k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
+ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
+ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
+ sigs.k8s.io/yaml v1.4.0 // indirect
+)
diff --git a/test/loadtest/go.sum b/test/loadtest/go.sum
new file mode 100644
index 0000000..a8edbda
--- /dev/null
+++ b/test/loadtest/go.sum
@@ -0,0 +1,154 @@
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
+github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU=
+github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
+github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM=
+github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
+github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
+github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
+github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
+github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
+github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw=
+github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
+golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
+golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
+golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
+golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
+golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
+golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo=
+k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE=
+k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc=
+k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
+k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8=
+k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/test/loadtest/internal/cluster/kind.go b/test/loadtest/internal/cluster/kind.go
new file mode 100644
index 0000000..bcd5e19
--- /dev/null
+++ b/test/loadtest/internal/cluster/kind.go
@@ -0,0 +1,313 @@
+// Package cluster provides kind cluster management functionality.
+package cluster
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "os"
+ "os/exec"
+ "strings"
+ "time"
+)
+
+// Config holds configuration for kind cluster operations.
+type Config struct {
+ Name string
+ ContainerRuntime string // "docker" or "podman"
+ PortOffset int // Offset for host port mappings (for parallel clusters)
+}
+
+// Manager handles kind cluster operations.
+type Manager struct {
+ cfg Config
+}
+
+// NewManager creates a new cluster manager.
+func NewManager(cfg Config) *Manager {
+ return &Manager{cfg: cfg}
+}
+
+// DetectContainerRuntime finds available container runtime.
+func DetectContainerRuntime() (string, error) {
+ if _, err := exec.LookPath("podman"); err == nil {
+ return "podman", nil
+ }
+ if _, err := exec.LookPath("docker"); err == nil {
+ return "docker", nil
+ }
+ return "", fmt.Errorf("neither docker nor podman found in PATH")
+}
+
+// Exists checks if the cluster already exists.
+func (m *Manager) Exists() bool {
+ cmd := exec.Command("kind", "get", "clusters")
+ out, err := cmd.Output()
+ if err != nil {
+ return false
+ }
+ for _, line := range strings.Split(string(out), "\n") {
+ if strings.TrimSpace(line) == m.cfg.Name {
+ return true
+ }
+ }
+ return false
+}
+
+// Delete deletes the kind cluster.
+func (m *Manager) Delete(ctx context.Context) error {
+ cmd := exec.CommandContext(ctx, "kind", "delete", "cluster", "--name", m.cfg.Name)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// Create creates a new kind cluster with optimized settings.
+func (m *Manager) Create(ctx context.Context) error {
+ if m.cfg.ContainerRuntime == "podman" {
+ os.Setenv("KIND_EXPERIMENTAL_PROVIDER", "podman")
+ }
+
+ if m.Exists() {
+ fmt.Printf("Cluster %s already exists, deleting...\n", m.cfg.Name)
+ if err := m.Delete(ctx); err != nil {
+ return fmt.Errorf("deleting existing cluster: %w", err)
+ }
+ }
+
+ // Calculate unique ports based on offset (for parallel clusters)
+ httpPort := 8080 + m.cfg.PortOffset
+ httpsPort := 8443 + m.cfg.PortOffset
+
+ config := fmt.Sprintf(`kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+networking:
+ podSubnet: "10.244.0.0/16"
+ serviceSubnet: "10.96.0.0/16"
+nodes:
+- role: control-plane
+ kubeadmConfigPatches:
+ - |
+ kind: InitConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ node-labels: "ingress-ready=true"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+ - |
+ kind: ClusterConfiguration
+ apiServer:
+ extraArgs:
+ max-requests-inflight: "800"
+ max-mutating-requests-inflight: "400"
+ watch-cache-sizes: "configmaps#1000,secrets#1000,pods#1000"
+ controllerManager:
+ extraArgs:
+ kube-api-qps: "200"
+ kube-api-burst: "200"
+ scheduler:
+ extraArgs:
+ kube-api-qps: "200"
+ kube-api-burst: "200"
+ extraPortMappings:
+ - containerPort: 80
+ hostPort: %d
+ protocol: TCP
+ - containerPort: 443
+ hostPort: %d
+ protocol: TCP
+- role: worker
+ kubeadmConfigPatches:
+ - |
+ kind: JoinConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ max-pods: "250"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+- role: worker
+ kubeadmConfigPatches:
+ - |
+ kind: JoinConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ max-pods: "250"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+- role: worker
+ kubeadmConfigPatches:
+ - |
+ kind: JoinConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ max-pods: "250"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+- role: worker
+ kubeadmConfigPatches:
+ - |
+ kind: JoinConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ max-pods: "250"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+- role: worker
+ kubeadmConfigPatches:
+ - |
+ kind: JoinConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ max-pods: "250"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+- role: worker
+ kubeadmConfigPatches:
+ - |
+ kind: JoinConfiguration
+ nodeRegistration:
+ kubeletExtraArgs:
+ max-pods: "250"
+ kube-api-qps: "50"
+ kube-api-burst: "100"
+ serialize-image-pulls: "false"
+ event-qps: "50"
+ event-burst: "100"
+`, httpPort, httpsPort)
+ cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", m.cfg.Name, "--config=-")
+ cmd.Stdin = strings.NewReader(config)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// GetKubeconfig returns the kubeconfig for the cluster.
+func (m *Manager) GetKubeconfig() (string, error) {
+ cmd := exec.Command("kind", "get", "kubeconfig", "--name", m.cfg.Name)
+ out, err := cmd.Output()
+ if err != nil {
+ return "", fmt.Errorf("getting kubeconfig: %w", err)
+ }
+ return string(out), nil
+}
+
+// Context returns the kubectl context name for this cluster.
+func (m *Manager) Context() string {
+ return "kind-" + m.cfg.Name
+}
+
+// Name returns the cluster name.
+func (m *Manager) Name() string {
+ return m.cfg.Name
+}
+
+// LoadImage loads a container image into the kind cluster.
+func (m *Manager) LoadImage(ctx context.Context, image string) error {
+ // First check if image exists locally
+ if !m.imageExistsLocally(image) {
+ fmt.Printf(" Image not found locally, pulling: %s\n", image)
+ pullCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image)
+ pullCmd.Stdout = os.Stdout
+ pullCmd.Stderr = os.Stderr
+ if err := pullCmd.Run(); err != nil {
+ return fmt.Errorf("pulling image %s: %w", image, err)
+ }
+ } else {
+ fmt.Printf(" Image found locally: %s\n", image)
+ }
+
+ fmt.Printf(" Copying image to kind cluster...\n")
+
+ if m.cfg.ContainerRuntime == "podman" {
+ // For podman, save to archive and load
+ tmpFile := fmt.Sprintf("/tmp/kind-image-%d.tar", time.Now().UnixNano())
+ defer os.Remove(tmpFile)
+
+ saveCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "save", image, "-o", tmpFile)
+ if err := saveCmd.Run(); err != nil {
+ return fmt.Errorf("saving image %s: %w", image, err)
+ }
+
+ loadCmd := exec.CommandContext(ctx, "kind", "load", "image-archive", tmpFile, "--name", m.cfg.Name)
+ loadCmd.Stdout = os.Stdout
+ loadCmd.Stderr = os.Stderr
+ if err := loadCmd.Run(); err != nil {
+ return fmt.Errorf("loading image archive: %w", err)
+ }
+ } else {
+ loadCmd := exec.CommandContext(ctx, "kind", "load", "docker-image", image, "--name", m.cfg.Name)
+ loadCmd.Stdout = os.Stdout
+ loadCmd.Stderr = os.Stderr
+ if err := loadCmd.Run(); err != nil {
+ return fmt.Errorf("loading image %s: %w", image, err)
+ }
+ }
+
+ return nil
+}
+
+// imageExistsLocally checks if an image exists in the local container runtime.
+func (m *Manager) imageExistsLocally(image string) bool {
+ // Try "image exists" command (works for podman)
+ cmd := exec.Command(m.cfg.ContainerRuntime, "image", "exists", image)
+ if err := cmd.Run(); err == nil {
+ return true
+ }
+
+ // Try "image inspect" (works for both docker and podman)
+ cmd = exec.Command(m.cfg.ContainerRuntime, "image", "inspect", image)
+ if err := cmd.Run(); err == nil {
+ return true
+ }
+
+ // Try listing images and grep
+ cmd = exec.Command(m.cfg.ContainerRuntime, "images", "--format", "{{.Repository}}:{{.Tag}}")
+ out, err := cmd.Output()
+ if err == nil {
+ for _, line := range strings.Split(string(out), "\n") {
+ if strings.TrimSpace(line) == image {
+ return true
+ }
+ }
+ }
+
+ return false
+}
+
+// PullImage pulls an image using the container runtime.
+func (m *Manager) PullImage(ctx context.Context, image string) error {
+ cmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image)
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ return cmd.Run()
+}
+
+// ExecKubectl runs a kubectl command against the cluster.
+func (m *Manager) ExecKubectl(ctx context.Context, args ...string) ([]byte, error) {
+ cmd := exec.CommandContext(ctx, "kubectl", args...)
+ var stdout, stderr bytes.Buffer
+ cmd.Stdout = &stdout
+ cmd.Stderr = &stderr
+ if err := cmd.Run(); err != nil {
+ return nil, fmt.Errorf("%w: %s", err, stderr.String())
+ }
+ return stdout.Bytes(), nil
+}
diff --git a/test/loadtest/internal/prometheus/prometheus.go b/test/loadtest/internal/prometheus/prometheus.go
new file mode 100644
index 0000000..f16df78
--- /dev/null
+++ b/test/loadtest/internal/prometheus/prometheus.go
@@ -0,0 +1,452 @@
+// Package prometheus provides Prometheus deployment and querying functionality.
+package prometheus
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net"
+ "net/http"
+ "net/url"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strings"
+ "time"
+)
+
+// Manager handles Prometheus operations.
+type Manager struct {
+ manifestPath string
+ portForward *exec.Cmd
+ localPort int
+ kubeContext string // Optional: use specific kubeconfig context
+}
+
+// NewManager creates a new Prometheus manager.
+func NewManager(manifestPath string) *Manager {
+ return &Manager{
+ manifestPath: manifestPath,
+ localPort: 9091, // Use 9091 to avoid conflicts
+ }
+}
+
+// NewManagerWithPort creates a Prometheus manager with a custom port.
+func NewManagerWithPort(manifestPath string, port int, kubeContext string) *Manager {
+ return &Manager{
+ manifestPath: manifestPath,
+ localPort: port,
+ kubeContext: kubeContext,
+ }
+}
+
+// kubectl returns kubectl args with optional context
+func (m *Manager) kubectl(args ...string) []string {
+ if m.kubeContext != "" {
+ return append([]string{"--context", m.kubeContext}, args...)
+ }
+ return args
+}
+
+// Deploy deploys Prometheus to the cluster.
+func (m *Manager) Deploy(ctx context.Context) error {
+ // Create namespace
+ cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("create", "namespace", "monitoring", "--dry-run=client", "-o", "yaml")...)
+ out, err := cmd.Output()
+ if err != nil {
+ return fmt.Errorf("generating namespace yaml: %w", err)
+ }
+
+ applyCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", "-")...)
+ applyCmd.Stdin = strings.NewReader(string(out))
+ if err := applyCmd.Run(); err != nil {
+ return fmt.Errorf("applying namespace: %w", err)
+ }
+
+ // Apply Prometheus manifest
+ applyCmd = exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", m.manifestPath)...)
+ applyCmd.Stdout = os.Stdout
+ applyCmd.Stderr = os.Stderr
+ if err := applyCmd.Run(); err != nil {
+ return fmt.Errorf("applying prometheus manifest: %w", err)
+ }
+
+ // Wait for Prometheus to be ready
+ fmt.Println("Waiting for Prometheus to be ready...")
+ waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod",
+ "-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...)
+ waitCmd.Stdout = os.Stdout
+ waitCmd.Stderr = os.Stderr
+ if err := waitCmd.Run(); err != nil {
+ return fmt.Errorf("waiting for prometheus: %w", err)
+ }
+
+ return nil
+}
+
+// StartPortForward starts port-forwarding to Prometheus.
+func (m *Manager) StartPortForward(ctx context.Context) error {
+ m.StopPortForward()
+
+ // Start port-forward
+ m.portForward = exec.CommandContext(ctx, "kubectl", m.kubectl("port-forward",
+ "-n", "monitoring", "svc/prometheus", fmt.Sprintf("%d:9090", m.localPort))...)
+
+ if err := m.portForward.Start(); err != nil {
+ return fmt.Errorf("starting port-forward: %w", err)
+ }
+
+ // Wait for port-forward to be ready
+ for i := 0; i < 30; i++ {
+ time.Sleep(time.Second)
+ if m.isAccessible() {
+ fmt.Printf("Prometheus accessible at http://localhost:%d\n", m.localPort)
+ return nil
+ }
+ }
+
+ return fmt.Errorf("prometheus port-forward not ready after 30s")
+}
+
+// StopPortForward stops the port-forward process.
+func (m *Manager) StopPortForward() {
+ if m.portForward != nil && m.portForward.Process != nil {
+ m.portForward.Process.Kill()
+ m.portForward = nil
+ }
+ // Also kill any lingering port-forwards
+ exec.Command("pkill", "-f", fmt.Sprintf("kubectl port-forward.*prometheus.*%d", m.localPort)).Run()
+}
+
+// Reset restarts Prometheus to clear all metrics.
+func (m *Manager) Reset(ctx context.Context) error {
+ m.StopPortForward()
+
+ // Delete Prometheus pod to reset metrics
+ cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("delete", "pod", "-n", "monitoring",
+ "-l", "app=prometheus", "--grace-period=0", "--force")...)
+ cmd.Run() // Ignore errors
+
+ // Wait for new pod
+ fmt.Println("Waiting for Prometheus to restart...")
+ waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod",
+ "-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...)
+ if err := waitCmd.Run(); err != nil {
+ return fmt.Errorf("waiting for prometheus restart: %w", err)
+ }
+
+ // Restart port-forward
+ if err := m.StartPortForward(ctx); err != nil {
+ return err
+ }
+
+ // Wait for scraping to initialize
+ fmt.Println("Waiting 5s for Prometheus to initialize scraping...")
+ time.Sleep(5 * time.Second)
+
+ return nil
+}
+
+func (m *Manager) isAccessible() bool {
+ conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", m.localPort), 2*time.Second)
+ if err != nil {
+ return false
+ }
+ conn.Close()
+
+ // Also try HTTP
+ resp, err := http.Get(fmt.Sprintf("http://localhost:%d/api/v1/status/config", m.localPort))
+ if err != nil {
+ return false
+ }
+ resp.Body.Close()
+ return resp.StatusCode == 200
+}
+
+// URL returns the local Prometheus URL.
+func (m *Manager) URL() string {
+ return fmt.Sprintf("http://localhost:%d", m.localPort)
+}
+
+// WaitForTarget waits for a specific job to be scraped by Prometheus.
+func (m *Manager) WaitForTarget(ctx context.Context, job string, timeout time.Duration) error {
+ fmt.Printf("Waiting for Prometheus to discover and scrape job '%s'...\n", job)
+
+ deadline := time.Now().Add(timeout)
+ for time.Now().Before(deadline) {
+ if m.isTargetHealthy(job) {
+ fmt.Printf("Prometheus is scraping job '%s'\n", job)
+ return nil
+ }
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-time.After(2 * time.Second):
+ }
+ }
+
+ // Print debug info on timeout
+ m.printTargetStatus(job)
+ return fmt.Errorf("timeout waiting for Prometheus to scrape job '%s'", job)
+}
+
+// isTargetHealthy checks if a job has at least one healthy target.
+func (m *Manager) isTargetHealthy(job string) bool {
+ resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL()))
+ if err != nil {
+ return false
+ }
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return false
+ }
+
+ var result struct {
+ Status string `json:"status"`
+ Data struct {
+ ActiveTargets []struct {
+ Labels map[string]string `json:"labels"`
+ Health string `json:"health"`
+ } `json:"activeTargets"`
+ } `json:"data"`
+ }
+
+ if err := json.Unmarshal(body, &result); err != nil {
+ return false
+ }
+
+ for _, target := range result.Data.ActiveTargets {
+ if target.Labels["job"] == job && target.Health == "up" {
+ return true
+ }
+ }
+ return false
+}
+
+// printTargetStatus prints debug info about targets.
+func (m *Manager) printTargetStatus(job string) {
+ resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL()))
+ if err != nil {
+ fmt.Printf("Failed to get targets: %v\n", err)
+ return
+ }
+ defer resp.Body.Close()
+
+ body, _ := io.ReadAll(resp.Body)
+
+ var result struct {
+ Data struct {
+ ActiveTargets []struct {
+ Labels map[string]string `json:"labels"`
+ Health string `json:"health"`
+ LastError string `json:"lastError"`
+ ScrapeURL string `json:"scrapeUrl"`
+ } `json:"activeTargets"`
+ } `json:"data"`
+ }
+
+ if err := json.Unmarshal(body, &result); err != nil {
+ fmt.Printf("Failed to parse targets: %v\n", err)
+ return
+ }
+
+ fmt.Printf("Prometheus targets for job '%s':\n", job)
+ found := false
+ for _, target := range result.Data.ActiveTargets {
+ if target.Labels["job"] == job {
+ found = true
+ fmt.Printf(" - %s: health=%s, lastError=%s\n",
+ target.ScrapeURL, target.Health, target.LastError)
+ }
+ }
+ if !found {
+ fmt.Printf(" No targets found for job '%s'\n", job)
+ fmt.Printf(" Available jobs: ")
+ jobs := make(map[string]bool)
+ for _, target := range result.Data.ActiveTargets {
+ jobs[target.Labels["job"]] = true
+ }
+ for j := range jobs {
+ fmt.Printf("%s ", j)
+ }
+ fmt.Println()
+ }
+}
+
+// HasMetrics checks if the specified job has any metrics available.
+func (m *Manager) HasMetrics(ctx context.Context, job string) bool {
+ query := fmt.Sprintf(`up{job="%s"}`, job)
+ result, err := m.Query(ctx, query)
+ if err != nil {
+ return false
+ }
+ return len(result.Data.Result) > 0 && result.Data.Result[0].Value[1] == "1"
+}
+
+// QueryResponse represents a Prometheus query response.
+type QueryResponse struct {
+ Status string `json:"status"`
+ Data struct {
+ ResultType string `json:"resultType"`
+ Result []struct {
+ Metric map[string]string `json:"metric"`
+ Value []interface{} `json:"value"`
+ } `json:"result"`
+ } `json:"data"`
+}
+
+// Query executes a PromQL query and returns the response.
+func (m *Manager) Query(ctx context.Context, query string) (*QueryResponse, error) {
+ u := fmt.Sprintf("%s/api/v1/query?query=%s", m.URL(), url.QueryEscape(query))
+
+ req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ client := &http.Client{Timeout: 10 * time.Second}
+ resp, err := client.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("querying prometheus: %w", err)
+ }
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("reading response: %w", err)
+ }
+
+ var result QueryResponse
+ if err := json.Unmarshal(body, &result); err != nil {
+ return nil, fmt.Errorf("parsing response: %w", err)
+ }
+
+ return &result, nil
+}
+
+// CollectMetrics collects all metrics for a scenario and writes to output directory.
+func (m *Manager) CollectMetrics(ctx context.Context, job, outputDir, scenario string) error {
+ if err := os.MkdirAll(outputDir, 0755); err != nil {
+ return fmt.Errorf("creating output directory: %w", err)
+ }
+
+ timeRange := "10m"
+
+ // For S6 (restart scenario), use increase() to handle counter resets
+ useIncrease := scenario == "S6"
+
+ // Counter metrics
+ counterMetrics := []string{
+ "reloader_reconcile_total",
+ "reloader_action_total",
+ "reloader_skipped_total",
+ "reloader_errors_total",
+ "reloader_events_received_total",
+ "reloader_workloads_scanned_total",
+ "reloader_workloads_matched_total",
+ "reloader_reload_executed_total",
+ }
+
+ for _, metric := range counterMetrics {
+ var query string
+ if useIncrease {
+ query = fmt.Sprintf(`sum(increase(%s{job="%s"}[%s])) by (success, reason)`, metric, job, timeRange)
+ } else {
+ query = fmt.Sprintf(`sum(%s{job="%s"}) by (success, reason)`, metric, job)
+ }
+
+ if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, metric+".json")); err != nil {
+ fmt.Printf("Warning: failed to collect %s: %v\n", metric, err)
+ }
+ }
+
+ // Histogram percentiles
+ histogramMetrics := []struct {
+ name string
+ prefix string
+ }{
+ {"reloader_reconcile_duration_seconds", "reconcile"},
+ {"reloader_action_latency_seconds", "action"},
+ }
+
+ for _, hm := range histogramMetrics {
+ for _, pct := range []int{50, 95, 99} {
+ quantile := float64(pct) / 100
+ query := fmt.Sprintf(`histogram_quantile(%v, sum(rate(%s_bucket{job="%s"}[%s])) by (le))`,
+ quantile, hm.name, job, timeRange)
+ outFile := filepath.Join(outputDir, fmt.Sprintf("%s_p%d.json", hm.prefix, pct))
+ if err := m.queryAndSave(ctx, query, outFile); err != nil {
+ fmt.Printf("Warning: failed to collect %s p%d: %v\n", hm.name, pct, err)
+ }
+ }
+ }
+
+ // REST client metrics
+ restQueries := map[string]string{
+ "rest_client_requests_total.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s"})`, job),
+ "rest_client_requests_get.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="GET"})`, job),
+ "rest_client_requests_patch.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PATCH"})`, job),
+ "rest_client_requests_put.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PUT"})`, job),
+ "rest_client_requests_errors.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",code=~"[45].."}) or vector(0)`, job),
+ }
+
+ for filename, query := range restQueries {
+ if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil {
+ fmt.Printf("Warning: failed to collect %s: %v\n", filename, err)
+ }
+ }
+
+ // Resource consumption metrics (memory, CPU, goroutines)
+ resourceQueries := map[string]string{
+ // Memory metrics (in bytes)
+ "memory_rss_bytes_avg.json": fmt.Sprintf(`avg_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange),
+ "memory_rss_bytes_max.json": fmt.Sprintf(`max_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange),
+ "memory_rss_bytes_cur.json": fmt.Sprintf(`process_resident_memory_bytes{job="%s"}`, job),
+
+ // Heap memory (Go runtime)
+ "memory_heap_bytes_avg.json": fmt.Sprintf(`avg_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange),
+ "memory_heap_bytes_max.json": fmt.Sprintf(`max_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange),
+
+ // CPU metrics (rate of CPU seconds used)
+ "cpu_usage_cores_avg.json": fmt.Sprintf(`rate(process_cpu_seconds_total{job="%s"}[%s])`, job, timeRange),
+ "cpu_usage_cores_max.json": fmt.Sprintf(`max_over_time(rate(process_cpu_seconds_total{job="%s"}[1m])[%s:1m])`, job, timeRange),
+
+ // Goroutines (concurrency indicator)
+ "goroutines_avg.json": fmt.Sprintf(`avg_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange),
+ "goroutines_max.json": fmt.Sprintf(`max_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange),
+ "goroutines_cur.json": fmt.Sprintf(`go_goroutines{job="%s"}`, job),
+
+ // GC metrics
+ "gc_duration_seconds_p99.json": fmt.Sprintf(`histogram_quantile(0.99, sum(rate(go_gc_duration_seconds_bucket{job="%s"}[%s])) by (le))`, job, timeRange),
+
+ // Threads
+ "threads_cur.json": fmt.Sprintf(`go_threads{job="%s"}`, job),
+ }
+
+ for filename, query := range resourceQueries {
+ if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil {
+ fmt.Printf("Warning: failed to collect %s: %v\n", filename, err)
+ }
+ }
+
+ return nil
+}
+
+func (m *Manager) queryAndSave(ctx context.Context, query, outputPath string) error {
+ result, err := m.Query(ctx, query)
+ if err != nil {
+ // Write empty result on error
+ emptyResult := `{"status":"success","data":{"resultType":"vector","result":[]}}`
+ return os.WriteFile(outputPath, []byte(emptyResult), 0644)
+ }
+
+ data, err := json.MarshalIndent(result, "", " ")
+ if err != nil {
+ return err
+ }
+
+ return os.WriteFile(outputPath, data, 0644)
+}
diff --git a/test/loadtest/internal/scenarios/scenarios.go b/test/loadtest/internal/scenarios/scenarios.go
new file mode 100644
index 0000000..ed48a3b
--- /dev/null
+++ b/test/loadtest/internal/scenarios/scenarios.go
@@ -0,0 +1,2092 @@
+// Package scenarios contains all load test scenario implementations.
+package scenarios
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "log"
+ "math/rand"
+ "os"
+ "path/filepath"
+ "sync"
+ "time"
+
+ "github.com/stakater/Reloader/test/loadtest/internal/reloader"
+
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/util/intstr"
+ "k8s.io/client-go/kubernetes"
+)
+
+// ExpectedMetrics holds the expected values for metrics verification.
+type ExpectedMetrics struct {
+ ActionTotal int `json:"action_total"`
+ ReloadExecutedTotal int `json:"reload_executed_total"`
+ ReconcileTotal int `json:"reconcile_total"`
+ WorkloadsScannedTotal int `json:"workloads_scanned_total"`
+ WorkloadsMatchedTotal int `json:"workloads_matched_total"`
+ SkippedTotal int `json:"skipped_total"`
+ Description string `json:"description"`
+}
+
+// Runner defines the interface for test scenarios.
+type Runner interface {
+ Name() string
+ Description() string
+ Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error)
+}
+
+// Registry holds all available test scenarios.
+var Registry = map[string]Runner{
+ "S1": &BurstUpdateScenario{},
+ "S2": &FanOutScenario{},
+ "S3": &HighCardinalityScenario{},
+ "S4": &NoOpUpdateScenario{},
+ "S5": &WorkloadChurnScenario{},
+ "S6": &ControllerRestartScenario{},
+ "S7": &APIPressureScenario{},
+ "S8": &LargeObjectScenario{},
+ "S9": &MultiWorkloadTypeScenario{},
+ "S10": &SecretsAndMixedScenario{},
+ "S11": &AnnotationStrategyScenario{},
+ "S12": &PauseResumeScenario{},
+ "S13": &ComplexReferencesScenario{},
+}
+
+// WriteExpectedMetrics writes expected metrics to a JSON file.
+func WriteExpectedMetrics(scenario, resultsDir string, expected ExpectedMetrics) error {
+ if resultsDir == "" {
+ return nil
+ }
+
+ dir := filepath.Join(resultsDir, scenario)
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ return fmt.Errorf("creating results directory: %w", err)
+ }
+
+ data, err := json.MarshalIndent(expected, "", " ")
+ if err != nil {
+ return fmt.Errorf("marshaling expected metrics: %w", err)
+ }
+
+ path := filepath.Join(dir, "expected.json")
+ if err := os.WriteFile(path, data, 0644); err != nil {
+ return fmt.Errorf("writing expected metrics: %w", err)
+ }
+
+ log.Printf("Expected metrics written to %s", path)
+ return nil
+}
+
+// BurstUpdateScenario - Many ConfigMap/Secret updates in quick succession.
+type BurstUpdateScenario struct{}
+
+func (s *BurstUpdateScenario) Name() string { return "S1" }
+func (s *BurstUpdateScenario) Description() string { return "Burst ConfigMap/Secret updates" }
+
+func (s *BurstUpdateScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S1: Creating base ConfigMaps and Deployments...")
+
+ const numConfigMaps = 10
+ const numDeployments = 10
+
+ setupCtx := context.Background()
+
+ for i := 0; i < numConfigMaps; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("burst-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{"key": "initial-value"},
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err)
+ }
+ }
+
+ for i := 0; i < numDeployments; i++ {
+ deploy := createDeployment(fmt.Sprintf("burst-deploy-%d", i), namespace, fmt.Sprintf("burst-cm-%d", i))
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Deployment: %v", err)
+ }
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S1: Starting burst updates...")
+
+ updateCount := 0
+ ticker := time.NewTicker(100 * time.Millisecond)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ log.Printf("S1: Context cancelled, completed %d burst updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ WorkloadsMatchedTotal: updateCount,
+ Description: fmt.Sprintf("S1: %d burst updates, each triggers 1 deployment reload", updateCount),
+ }, nil
+ case <-ticker.C:
+ cmIndex := rand.Intn(numConfigMaps)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("burst-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("value-%d-%d", updateCount, time.Now().UnixNano())
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update ConfigMap: %v", err)
+ } else {
+ updateCount++
+ }
+ }
+ }
+
+ log.Printf("S1: Completed %d burst updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ WorkloadsMatchedTotal: updateCount,
+ Description: fmt.Sprintf("S1: %d burst updates, each triggers 1 deployment reload", updateCount),
+ }, nil
+}
+
+// FanOutScenario - One ConfigMap used by many workloads.
+type FanOutScenario struct{}
+
+func (s *FanOutScenario) Name() string { return "S2" }
+func (s *FanOutScenario) Description() string { return "Fan-out (one CM -> many workloads)" }
+
+func (s *FanOutScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S2: Creating shared ConfigMap and multiple Deployments...")
+
+ const numDeployments = 50
+ setupCtx := context.Background()
+
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "shared-cm",
+ Namespace: namespace,
+ },
+ Data: map[string]string{"config": "initial"},
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ return ExpectedMetrics{}, fmt.Errorf("failed to create shared ConfigMap: %w", err)
+ }
+
+ for i := 0; i < numDeployments; i++ {
+ deploy := createDeployment(fmt.Sprintf("fanout-deploy-%d", i), namespace, "shared-cm")
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Deployment %d: %v", i, err)
+ }
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 5*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S2: Updating shared ConfigMap...")
+
+ // Check context state before starting update loop
+ if ctx.Err() != nil {
+ log.Printf("S2: WARNING - Context already done before update loop: %v", ctx.Err())
+ }
+ if deadline, ok := ctx.Deadline(); ok {
+ remaining := time.Until(deadline)
+ log.Printf("S2: Context deadline in %v", remaining)
+ if remaining < 10*time.Second {
+ log.Printf("S2: WARNING - Very little time remaining on context!")
+ }
+ } else {
+ log.Println("S2: Context has no deadline")
+ }
+
+ updateCount := 0
+ ticker := time.NewTicker(5 * time.Second)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ log.Printf("S2: Will run updates for %v (duration=%v)", duration-5*time.Second, duration)
+
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ expectedActions := updateCount * numDeployments
+ log.Printf("S2: Context done (err=%v), completed %d fan-out updates", ctx.Err(), updateCount)
+ return ExpectedMetrics{
+ ActionTotal: expectedActions,
+ ReloadExecutedTotal: expectedActions,
+ WorkloadsScannedTotal: expectedActions,
+ WorkloadsMatchedTotal: expectedActions,
+ Description: fmt.Sprintf("S2: %d updates × %d deployments = %d expected reloads", updateCount, numDeployments, expectedActions),
+ }, nil
+ case <-ticker.C:
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "shared-cm", metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["config"] = fmt.Sprintf("update-%d", updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update shared ConfigMap: %v", err)
+ } else {
+ updateCount++
+ log.Printf("S2: Updated shared ConfigMap (should trigger %d reloads)", numDeployments)
+ }
+ }
+ }
+
+ expectedActions := updateCount * numDeployments
+ log.Printf("S2: Completed %d fan-out updates, expected %d total actions", updateCount, expectedActions)
+ return ExpectedMetrics{
+ ActionTotal: expectedActions,
+ ReloadExecutedTotal: expectedActions,
+ WorkloadsScannedTotal: expectedActions,
+ WorkloadsMatchedTotal: expectedActions,
+ Description: fmt.Sprintf("S2: %d updates × %d deployments = %d expected reloads", updateCount, numDeployments, expectedActions),
+ }, nil
+}
+
+// HighCardinalityScenario - Many ConfigMaps/Secrets across many namespaces.
+type HighCardinalityScenario struct{}
+
+func (s *HighCardinalityScenario) Name() string { return "S3" }
+func (s *HighCardinalityScenario) Description() string {
+ return "High cardinality (many CMs, many namespaces)"
+}
+
+func (s *HighCardinalityScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S3: Creating high cardinality resources...")
+
+ setupCtx := context.Background()
+
+ namespaces := []string{namespace}
+ for i := 0; i < 10; i++ {
+ ns := fmt.Sprintf("%s-%d", namespace, i)
+ if _, err := client.CoreV1().Namespaces().Create(setupCtx, &corev1.Namespace{
+ ObjectMeta: metav1.ObjectMeta{Name: ns},
+ }, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create namespace %s: %v", ns, err)
+ } else {
+ namespaces = append(namespaces, ns)
+ }
+ }
+
+ for _, ns := range namespaces {
+ for i := 0; i < 20; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("hc-cm-%d", i),
+ Namespace: ns,
+ },
+ Data: map[string]string{"key": "value"},
+ }
+ client.CoreV1().ConfigMaps(ns).Create(setupCtx, cm, metav1.CreateOptions{})
+ deploy := createDeployment(fmt.Sprintf("hc-deploy-%d", i), ns, fmt.Sprintf("hc-cm-%d", i))
+ client.AppsV1().Deployments(ns).Create(setupCtx, deploy, metav1.CreateOptions{})
+ }
+ }
+
+ if err := waitForAllNamespacesReady(setupCtx, client, namespaces, 5*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S3: Starting random updates across namespaces...")
+
+ updateDuration := duration - 5*time.Second
+ if updateDuration < 30*time.Second {
+ updateDuration = 30 * time.Second
+ }
+
+ updateCount := 0
+ ticker := time.NewTicker(200 * time.Millisecond)
+ defer ticker.Stop()
+
+ updateCtx, updateCancel := context.WithTimeout(context.Background(), updateDuration)
+ defer updateCancel()
+
+ endTime := time.Now().Add(updateDuration)
+ log.Printf("S3: Will run updates for %v (until %v)", updateDuration, endTime.Format("15:04:05"))
+
+ for time.Now().Before(endTime) {
+ select {
+ case <-updateCtx.Done():
+ log.Printf("S3: Completed %d high cardinality updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ Description: fmt.Sprintf("S3: %d updates across %d namespaces", updateCount, len(namespaces)),
+ }, nil
+ case <-ticker.C:
+ ns := namespaces[rand.Intn(len(namespaces))]
+ cmIndex := rand.Intn(20)
+ cm, err := client.CoreV1().ConfigMaps(ns).Get(setupCtx, fmt.Sprintf("hc-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("update-%d", updateCount)
+ if _, err := client.CoreV1().ConfigMaps(ns).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil {
+ updateCount++
+ }
+ }
+ }
+
+ log.Printf("S3: Completed %d high cardinality updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ Description: fmt.Sprintf("S3: %d updates across %d namespaces", updateCount, len(namespaces)),
+ }, nil
+}
+
+// NoOpUpdateScenario - Updates that don't actually change data.
+type NoOpUpdateScenario struct{}
+
+func (s *NoOpUpdateScenario) Name() string { return "S4" }
+func (s *NoOpUpdateScenario) Description() string { return "No-op updates (same data)" }
+
+func (s *NoOpUpdateScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S4: Creating ConfigMaps and Deployments for no-op test...")
+
+ setupCtx := context.Background()
+
+ for i := 0; i < 10; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("noop-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{"key": "static-value"},
+ }
+ client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{})
+ deploy := createDeployment(fmt.Sprintf("noop-deploy-%d", i), namespace, fmt.Sprintf("noop-cm-%d", i))
+ client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{})
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S4: Starting no-op updates (annotation changes only)...")
+
+ updateCount := 0
+ ticker := time.NewTicker(100 * time.Millisecond)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ log.Printf("S4: Completed %d no-op updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: 0,
+ ReloadExecutedTotal: 0,
+ SkippedTotal: updateCount,
+ Description: fmt.Sprintf("S4: %d no-op updates, all should be skipped", updateCount),
+ }, nil
+ case <-ticker.C:
+ cmIndex := rand.Intn(10)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("noop-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ if cm.Annotations == nil {
+ cm.Annotations = make(map[string]string)
+ }
+ cm.Annotations["noop-counter"] = fmt.Sprintf("%d", updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil {
+ updateCount++
+ }
+ }
+ }
+
+ log.Printf("S4: Completed %d no-op updates (should see 0 actions)", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: 0,
+ ReloadExecutedTotal: 0,
+ SkippedTotal: updateCount,
+ Description: fmt.Sprintf("S4: %d no-op updates, all should be skipped", updateCount),
+ }, nil
+}
+
+// WorkloadChurnScenario - Deployments created and deleted rapidly.
+type WorkloadChurnScenario struct{}
+
+func (s *WorkloadChurnScenario) Name() string { return "S5" }
+func (s *WorkloadChurnScenario) Description() string { return "Workload churn (rapid create/delete)" }
+
+func (s *WorkloadChurnScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S5: Creating base ConfigMap...")
+
+ setupCtx := context.Background()
+
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{Name: "churn-cm", Namespace: namespace},
+ Data: map[string]string{"key": "value"},
+ }
+ client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{})
+
+ log.Println("S5: Starting workload churn...")
+
+ var wg sync.WaitGroup
+ var mu sync.Mutex
+ deployCounter := 0
+ deleteCounter := 0
+ cmUpdateCount := 0
+
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ ticker := time.NewTicker(500 * time.Millisecond)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ deployName := fmt.Sprintf("churn-deploy-%d", deployCounter)
+ deploy := createDeployment(deployName, namespace, "churn-cm")
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err == nil {
+ mu.Lock()
+ deployCounter++
+ mu.Unlock()
+ }
+ if deployCounter > 10 {
+ oldName := fmt.Sprintf("churn-deploy-%d", deployCounter-10)
+ if err := client.AppsV1().Deployments(namespace).Delete(setupCtx, oldName, metav1.DeleteOptions{}); err == nil {
+ mu.Lock()
+ deleteCounter++
+ mu.Unlock()
+ }
+ }
+ }
+ }
+ }()
+
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "churn-cm", metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("update-%d", cmUpdateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil {
+ mu.Lock()
+ cmUpdateCount++
+ mu.Unlock()
+ }
+ }
+ }
+ }()
+
+ wg.Wait()
+ log.Printf("S5: Created %d, deleted %d deployments, %d CM updates", deployCounter, deleteCounter, cmUpdateCount)
+
+ // S5 does NOT set expected values for action_total/reload_executed_total because:
+ // - There are ~10 active deployments at any time (creates new, deletes old)
+ // - Each CM update triggers reloads on ALL active deployments
+ // - Exact counts depend on timing of creates/deletes vs CM updates
+ // - "Not found" errors are expected when a deployment is deleted during processing
+ // Instead, S5 pass/fail compares old vs new (both should be similar)
+ return ExpectedMetrics{
+ // No expected values - churn makes exact counts unpredictable
+ Description: fmt.Sprintf("S5: Churn test - %d deploys created, %d deleted, %d CM updates, ~10 active deploys at any time", deployCounter, deleteCounter, cmUpdateCount),
+ }, nil
+}
+
+// ControllerRestartScenario - Restart controller under load.
+type ControllerRestartScenario struct {
+ ReloaderVersion string
+}
+
+func (s *ControllerRestartScenario) Name() string { return "S6" }
+func (s *ControllerRestartScenario) Description() string {
+ return "Controller restart under load"
+}
+
+func (s *ControllerRestartScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S6: Creating resources and generating load...")
+
+ setupCtx := context.Background()
+
+ for i := 0; i < 20; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("restart-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{"key": "initial"},
+ }
+ client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{})
+ deploy := createDeployment(fmt.Sprintf("restart-deploy-%d", i), namespace, fmt.Sprintf("restart-cm-%d", i))
+ client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{})
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ var wg sync.WaitGroup
+ var mu sync.Mutex
+ updateCount := 0
+
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ ticker := time.NewTicker(200 * time.Millisecond)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return
+ case <-ticker.C:
+ cmIndex := rand.Intn(20)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("restart-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("update-%d", updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil {
+ mu.Lock()
+ updateCount++
+ mu.Unlock()
+ }
+ }
+ }
+ }()
+
+ reloaderNS := fmt.Sprintf("reloader-%s", s.ReloaderVersion)
+ if s.ReloaderVersion == "" {
+ reloaderNS = "reloader-new"
+ }
+
+ log.Println("S6: Waiting 20 seconds before restarting controller...")
+ time.Sleep(20 * time.Second)
+
+ log.Println("S6: Restarting Reloader pod...")
+ pods, err := client.CoreV1().Pods(reloaderNS).List(setupCtx, metav1.ListOptions{
+ LabelSelector: "app=reloader",
+ })
+ if err == nil && len(pods.Items) > 0 {
+ client.CoreV1().Pods(reloaderNS).Delete(setupCtx, pods.Items[0].Name, metav1.DeleteOptions{})
+ }
+
+ wg.Wait()
+ log.Printf("S6: Controller restart scenario completed with %d updates", updateCount)
+ return ExpectedMetrics{
+ Description: fmt.Sprintf("S6: Restart test - %d updates during restart", updateCount),
+ }, nil
+}
+
+// APIPressureScenario - Simulate API server pressure with many concurrent requests.
+type APIPressureScenario struct{}
+
+func (s *APIPressureScenario) Name() string { return "S7" }
+func (s *APIPressureScenario) Description() string { return "API pressure (many concurrent requests)" }
+
+func (s *APIPressureScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S7: Creating resources for API pressure test...")
+
+ const numConfigMaps = 50
+ setupCtx := context.Background()
+
+ for i := 0; i < numConfigMaps; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("api-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{"key": "value"},
+ }
+ client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{})
+ deploy := createDeployment(fmt.Sprintf("api-deploy-%d", i), namespace, fmt.Sprintf("api-cm-%d", i))
+ client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{})
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 5*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S7: Starting concurrent updates from multiple goroutines...")
+
+ updateDuration := duration - 5*time.Second
+ if updateDuration < 30*time.Second {
+ updateDuration = 30 * time.Second
+ }
+
+ updateCtx, updateCancel := context.WithTimeout(context.Background(), updateDuration)
+ defer updateCancel()
+
+ endTime := time.Now().Add(updateDuration)
+ log.Printf("S7: Will run updates for %v (until %v)", updateDuration, endTime.Format("15:04:05"))
+
+ var wg sync.WaitGroup
+ var mu sync.Mutex
+ totalUpdates := 0
+
+ for g := 0; g < 10; g++ {
+ wg.Add(1)
+ go func(goroutineID int) {
+ defer wg.Done()
+ ticker := time.NewTicker(100 * time.Millisecond)
+ defer ticker.Stop()
+
+ updateCount := 0
+ for time.Now().Before(endTime) {
+ select {
+ case <-updateCtx.Done():
+ return
+ case <-ticker.C:
+ cmIndex := rand.Intn(numConfigMaps)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("api-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("g%d-update-%d", goroutineID, updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil {
+ updateCount++
+ }
+ }
+ }
+ mu.Lock()
+ totalUpdates += updateCount
+ mu.Unlock()
+ log.Printf("S7: Goroutine %d completed %d updates", goroutineID, updateCount)
+ }(g)
+ }
+
+ wg.Wait()
+ log.Printf("S7: API pressure scenario completed with %d total updates", totalUpdates)
+ return ExpectedMetrics{
+ ActionTotal: totalUpdates,
+ ReloadExecutedTotal: totalUpdates,
+ Description: fmt.Sprintf("S7: %d concurrent updates from 10 goroutines", totalUpdates),
+ }, nil
+}
+
+// LargeObjectScenario - Large ConfigMaps/Secrets.
+type LargeObjectScenario struct{}
+
+func (s *LargeObjectScenario) Name() string { return "S8" }
+func (s *LargeObjectScenario) Description() string { return "Large ConfigMaps/Secrets (>100KB)" }
+
+func (s *LargeObjectScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S8: Creating large ConfigMaps...")
+
+ setupCtx := context.Background()
+
+ largeData := make([]byte, 100*1024)
+ for i := range largeData {
+ largeData[i] = byte('a' + (i % 26))
+ }
+ largeValue := string(largeData)
+
+ for i := 0; i < 10; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("large-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{
+ "large-key-1": largeValue,
+ "large-key-2": largeValue,
+ },
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create large ConfigMap %d: %v", i, err)
+ }
+ deploy := createDeployment(fmt.Sprintf("large-deploy-%d", i), namespace, fmt.Sprintf("large-cm-%d", i))
+ client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{})
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S8: Starting large object updates...")
+
+ updateCount := 0
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ log.Printf("S8: Completed %d large object updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ Description: fmt.Sprintf("S8: %d large object (100KB) updates", updateCount),
+ }, nil
+ case <-ticker.C:
+ cmIndex := rand.Intn(10)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("large-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["large-key-1"] = largeValue[:len(largeValue)-10] + fmt.Sprintf("-%d", updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update large ConfigMap: %v", err)
+ } else {
+ updateCount++
+ }
+ }
+ }
+
+ log.Printf("S8: Completed %d large object updates", updateCount)
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ Description: fmt.Sprintf("S8: %d large object (100KB) updates", updateCount),
+ }, nil
+}
+
+// Helper functions
+
+func waitForDeploymentsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error {
+ log.Printf("Waiting for all deployments in %s to be ready (timeout: %v)...", namespace, timeout)
+
+ deadline := time.Now().Add(timeout)
+ for time.Now().Before(deadline) {
+ deployments, err := client.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{})
+ if err != nil {
+ return fmt.Errorf("failed to list deployments: %w", err)
+ }
+
+ allReady := true
+ notReady := 0
+ for _, d := range deployments.Items {
+ if d.Status.ReadyReplicas < *d.Spec.Replicas {
+ allReady = false
+ notReady++
+ }
+ }
+
+ if allReady && len(deployments.Items) > 0 {
+ log.Printf("All %d deployments in %s are ready", len(deployments.Items), namespace)
+ return nil
+ }
+
+ log.Printf("Waiting for deployments: %d/%d not ready yet...", notReady, len(deployments.Items))
+ time.Sleep(5 * time.Second)
+ }
+
+ return fmt.Errorf("timeout waiting for deployments to be ready")
+}
+
+func waitForAllNamespacesReady(ctx context.Context, client kubernetes.Interface, namespaces []string, timeout time.Duration) error {
+ log.Printf("Waiting for deployments in %d namespaces to be ready...", len(namespaces))
+
+ deadline := time.Now().Add(timeout)
+ for time.Now().Before(deadline) {
+ allReady := true
+ totalDeploys := 0
+ notReady := 0
+
+ for _, ns := range namespaces {
+ deployments, err := client.AppsV1().Deployments(ns).List(ctx, metav1.ListOptions{})
+ if err != nil {
+ continue
+ }
+ for _, d := range deployments.Items {
+ totalDeploys++
+ if d.Status.ReadyReplicas < *d.Spec.Replicas {
+ allReady = false
+ notReady++
+ }
+ }
+ }
+
+ if allReady && totalDeploys > 0 {
+ log.Printf("All %d deployments across %d namespaces are ready", totalDeploys, len(namespaces))
+ return nil
+ }
+
+ log.Printf("Waiting: %d/%d deployments not ready yet...", notReady, totalDeploys)
+ time.Sleep(5 * time.Second)
+ }
+
+ return fmt.Errorf("timeout waiting for deployments to be ready")
+}
+
+func createDeployment(name, namespace, configMapName string) *appsv1.Deployment {
+ replicas := int32(1)
+ maxSurge := intstr.FromInt(1)
+ maxUnavailable := intstr.FromInt(1)
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.Deployment{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ },
+ },
+ Spec: appsv1.DeploymentSpec{
+ Replicas: &replicas,
+ Strategy: appsv1.DeploymentStrategy{
+ Type: appsv1.RollingUpdateDeploymentStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDeployment{
+ MaxSurge: &maxSurge,
+ MaxUnavailable: &maxUnavailable,
+ },
+ },
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ Containers: []corev1.Container{
+ {
+ Name: "app",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ ConfigMapRef: &corev1.ConfigMapEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: configMapName,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+func createDeploymentWithSecret(name, namespace, secretName string) *appsv1.Deployment {
+ replicas := int32(1)
+ maxSurge := intstr.FromInt(1)
+ maxUnavailable := intstr.FromInt(1)
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.Deployment{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ },
+ },
+ Spec: appsv1.DeploymentSpec{
+ Replicas: &replicas,
+ Strategy: appsv1.DeploymentStrategy{
+ Type: appsv1.RollingUpdateDeploymentStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDeployment{
+ MaxSurge: &maxSurge,
+ MaxUnavailable: &maxUnavailable,
+ },
+ },
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ Containers: []corev1.Container{
+ {
+ Name: "app",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ SecretRef: &corev1.SecretEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: secretName,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+func createDeploymentWithBoth(name, namespace, configMapName, secretName string) *appsv1.Deployment {
+ replicas := int32(1)
+ maxSurge := intstr.FromInt(1)
+ maxUnavailable := intstr.FromInt(1)
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.Deployment{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ },
+ },
+ Spec: appsv1.DeploymentSpec{
+ Replicas: &replicas,
+ Strategy: appsv1.DeploymentStrategy{
+ Type: appsv1.RollingUpdateDeploymentStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDeployment{
+ MaxSurge: &maxSurge,
+ MaxUnavailable: &maxUnavailable,
+ },
+ },
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ Containers: []corev1.Container{
+ {
+ Name: "app",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ ConfigMapRef: &corev1.ConfigMapEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: configMapName,
+ },
+ },
+ },
+ {
+ SecretRef: &corev1.SecretEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: secretName,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+// SecretsAndMixedScenario - Tests Secrets and mixed ConfigMap+Secret workloads.
+type SecretsAndMixedScenario struct{}
+
+func (s *SecretsAndMixedScenario) Name() string { return "S10" }
+func (s *SecretsAndMixedScenario) Description() string {
+ return "Secrets and mixed ConfigMap+Secret workloads"
+}
+
+func (s *SecretsAndMixedScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S10: Creating Secrets, ConfigMaps, and mixed workloads...")
+
+ const numSecrets = 5
+ const numConfigMaps = 5
+ const numSecretOnlyDeploys = 5
+ const numConfigMapOnlyDeploys = 3
+ const numMixedDeploys = 2
+
+ setupCtx := context.Background()
+
+ // Create Secrets
+ for i := 0; i < numSecrets; i++ {
+ secret := &corev1.Secret{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("mixed-secret-%d", i),
+ Namespace: namespace,
+ },
+ StringData: map[string]string{
+ "password": fmt.Sprintf("initial-secret-%d", i),
+ },
+ }
+ if _, err := client.CoreV1().Secrets(namespace).Create(setupCtx, secret, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Secret %s: %v", secret.Name, err)
+ }
+ }
+
+ // Create ConfigMaps
+ for i := 0; i < numConfigMaps; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("mixed-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{
+ "config": fmt.Sprintf("initial-config-%d", i),
+ },
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err)
+ }
+ }
+
+ // Create Secret-only deployments
+ for i := 0; i < numSecretOnlyDeploys; i++ {
+ deploy := createDeploymentWithSecret(
+ fmt.Sprintf("secret-only-deploy-%d", i),
+ namespace,
+ fmt.Sprintf("mixed-secret-%d", i%numSecrets),
+ )
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Secret-only Deployment: %v", err)
+ }
+ }
+
+ // Create ConfigMap-only deployments
+ for i := 0; i < numConfigMapOnlyDeploys; i++ {
+ deploy := createDeployment(
+ fmt.Sprintf("cm-only-deploy-%d", i),
+ namespace,
+ fmt.Sprintf("mixed-cm-%d", i%numConfigMaps),
+ )
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ConfigMap-only Deployment: %v", err)
+ }
+ }
+
+ // Create mixed deployments (using both Secret and ConfigMap)
+ for i := 0; i < numMixedDeploys; i++ {
+ deploy := createDeploymentWithBoth(
+ fmt.Sprintf("mixed-deploy-%d", i),
+ namespace,
+ fmt.Sprintf("mixed-cm-%d", i%numConfigMaps),
+ fmt.Sprintf("mixed-secret-%d", i%numSecrets),
+ )
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create mixed Deployment: %v", err)
+ }
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S10: Starting alternating Secret and ConfigMap updates...")
+
+ secretUpdateCount := 0
+ cmUpdateCount := 0
+ ticker := time.NewTicker(500 * time.Millisecond)
+ defer ticker.Stop()
+
+ updateSecret := true // Alternate between Secret and ConfigMap updates
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return s.calculateExpected(secretUpdateCount, cmUpdateCount, numSecretOnlyDeploys, numConfigMapOnlyDeploys, numMixedDeploys), nil
+ case <-ticker.C:
+ if updateSecret {
+ // Update a random Secret
+ secretIndex := rand.Intn(numSecrets)
+ secret, err := client.CoreV1().Secrets(namespace).Get(setupCtx, fmt.Sprintf("mixed-secret-%d", secretIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ secret.StringData = map[string]string{
+ "password": fmt.Sprintf("updated-secret-%d-%d", secretIndex, secretUpdateCount),
+ }
+ if _, err := client.CoreV1().Secrets(namespace).Update(setupCtx, secret, metav1.UpdateOptions{}); err == nil {
+ secretUpdateCount++
+ }
+ } else {
+ // Update a random ConfigMap
+ cmIndex := rand.Intn(numConfigMaps)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("mixed-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["config"] = fmt.Sprintf("updated-config-%d-%d", cmIndex, cmUpdateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil {
+ cmUpdateCount++
+ }
+ }
+ updateSecret = !updateSecret
+ }
+ }
+
+ log.Printf("S10: Completed %d Secret updates and %d ConfigMap updates", secretUpdateCount, cmUpdateCount)
+ return s.calculateExpected(secretUpdateCount, cmUpdateCount, numSecretOnlyDeploys, numConfigMapOnlyDeploys, numMixedDeploys), nil
+}
+
+func (s *SecretsAndMixedScenario) calculateExpected(secretUpdates, cmUpdates, secretOnlyDeploys, cmOnlyDeploys, mixedDeploys int) ExpectedMetrics {
+ // Secret updates trigger: secret-only deploys + mixed deploys
+ secretTriggeredReloads := secretUpdates * (secretOnlyDeploys + mixedDeploys)
+ // ConfigMap updates trigger: cm-only deploys + mixed deploys
+ cmTriggeredReloads := cmUpdates * (cmOnlyDeploys + mixedDeploys)
+ totalExpectedReloads := secretTriggeredReloads + cmTriggeredReloads
+
+ return ExpectedMetrics{
+ ActionTotal: totalExpectedReloads,
+ ReloadExecutedTotal: totalExpectedReloads,
+ Description: fmt.Sprintf("S10: %d Secret updates (→%d reloads) + %d CM updates (→%d reloads) = %d total",
+ secretUpdates, secretTriggeredReloads, cmUpdates, cmTriggeredReloads, totalExpectedReloads),
+ }
+}
+
+// MultiWorkloadTypeScenario - Tests all supported workload types with a shared ConfigMap.
+type MultiWorkloadTypeScenario struct{}
+
+func (s *MultiWorkloadTypeScenario) Name() string { return "S9" }
+func (s *MultiWorkloadTypeScenario) Description() string {
+ return "Multi-workload types (Deploy, StatefulSet, DaemonSet, Job, CronJob)"
+}
+
+func (s *MultiWorkloadTypeScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S9: Creating shared ConfigMap and multiple workload types...")
+
+ const numDeployments = 5
+ const numStatefulSets = 3
+ const numDaemonSets = 2
+
+ setupCtx := context.Background()
+
+ // Create shared ConfigMap
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "multi-type-cm",
+ Namespace: namespace,
+ },
+ Data: map[string]string{"config": "initial"},
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ return ExpectedMetrics{}, fmt.Errorf("failed to create shared ConfigMap: %w", err)
+ }
+
+ // Create Deployments
+ for i := 0; i < numDeployments; i++ {
+ deploy := createDeployment(fmt.Sprintf("multi-deploy-%d", i), namespace, "multi-type-cm")
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Deployment %d: %v", i, err)
+ }
+ }
+
+ // Create StatefulSets
+ for i := 0; i < numStatefulSets; i++ {
+ sts := createStatefulSet(fmt.Sprintf("multi-sts-%d", i), namespace, "multi-type-cm")
+ if _, err := client.AppsV1().StatefulSets(namespace).Create(setupCtx, sts, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create StatefulSet %d: %v", i, err)
+ }
+ }
+
+ // Create DaemonSets
+ for i := 0; i < numDaemonSets; i++ {
+ ds := createDaemonSet(fmt.Sprintf("multi-ds-%d", i), namespace, "multi-type-cm")
+ if _, err := client.AppsV1().DaemonSets(namespace).Create(setupCtx, ds, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create DaemonSet %d: %v", i, err)
+ }
+ }
+
+ // Wait for workloads to be ready
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+ if err := waitForStatefulSetsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+ if err := waitForDaemonSetsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S9: Starting ConfigMap updates to trigger reloads on all workload types...")
+
+ updateCount := 0
+ ticker := time.NewTicker(5 * time.Second)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return s.calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets), nil
+ case <-ticker.C:
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "multi-type-cm", metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["config"] = fmt.Sprintf("update-%d", updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update shared ConfigMap: %v", err)
+ } else {
+ updateCount++
+ log.Printf("S9: Updated shared ConfigMap (update #%d)", updateCount)
+ }
+ }
+ }
+
+ log.Printf("S9: Completed %d ConfigMap updates", updateCount)
+ return s.calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets), nil
+}
+
+func (s *MultiWorkloadTypeScenario) calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets int) ExpectedMetrics {
+ // Each CM update triggers reload on all workloads
+ totalWorkloads := numDeployments + numStatefulSets + numDaemonSets
+ expectedReloads := updateCount * totalWorkloads
+
+ return ExpectedMetrics{
+ ActionTotal: expectedReloads,
+ ReloadExecutedTotal: expectedReloads,
+ WorkloadsMatchedTotal: expectedReloads,
+ Description: fmt.Sprintf("S9: %d CM updates × %d workloads (%d Deploys + %d STS + %d DS) = %d reloads",
+ updateCount, totalWorkloads, numDeployments, numStatefulSets, numDaemonSets, expectedReloads),
+ }
+}
+
+func createStatefulSet(name, namespace, configMapName string) *appsv1.StatefulSet {
+ replicas := int32(1)
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ },
+ },
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: &replicas,
+ ServiceName: name,
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ Containers: []corev1.Container{
+ {
+ Name: "app",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ ConfigMapRef: &corev1.ConfigMapEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: configMapName,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+func createDaemonSet(name, namespace, configMapName string) *appsv1.DaemonSet {
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.DaemonSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ },
+ },
+ Spec: appsv1.DaemonSetSpec{
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ // Use tolerations to run on all nodes including control-plane
+ Tolerations: []corev1.Toleration{
+ {
+ Key: "node-role.kubernetes.io/control-plane",
+ Operator: corev1.TolerationOpExists,
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ {
+ Key: "node-role.kubernetes.io/master",
+ Operator: corev1.TolerationOpExists,
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ Containers: []corev1.Container{
+ {
+ Name: "app",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ ConfigMapRef: &corev1.ConfigMapEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: configMapName,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+func waitForStatefulSetsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error {
+ log.Printf("Waiting for all StatefulSets in %s to be ready (timeout: %v)...", namespace, timeout)
+
+ deadline := time.Now().Add(timeout)
+ for time.Now().Before(deadline) {
+ stsList, err := client.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{})
+ if err != nil {
+ return fmt.Errorf("failed to list StatefulSets: %w", err)
+ }
+
+ if len(stsList.Items) == 0 {
+ log.Printf("No StatefulSets found in %s", namespace)
+ return nil
+ }
+
+ allReady := true
+ notReady := 0
+ for _, sts := range stsList.Items {
+ if sts.Status.ReadyReplicas < *sts.Spec.Replicas {
+ allReady = false
+ notReady++
+ }
+ }
+
+ if allReady {
+ log.Printf("All %d StatefulSets in %s are ready", len(stsList.Items), namespace)
+ return nil
+ }
+
+ log.Printf("Waiting for StatefulSets: %d/%d not ready yet...", notReady, len(stsList.Items))
+ time.Sleep(5 * time.Second)
+ }
+
+ return fmt.Errorf("timeout waiting for StatefulSets to be ready")
+}
+
+func waitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error {
+ log.Printf("Waiting for all DaemonSets in %s to be ready (timeout: %v)...", namespace, timeout)
+
+ deadline := time.Now().Add(timeout)
+ for time.Now().Before(deadline) {
+ dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{})
+ if err != nil {
+ return fmt.Errorf("failed to list DaemonSets: %w", err)
+ }
+
+ if len(dsList.Items) == 0 {
+ log.Printf("No DaemonSets found in %s", namespace)
+ return nil
+ }
+
+ allReady := true
+ notReady := 0
+ for _, ds := range dsList.Items {
+ if ds.Status.NumberReady < ds.Status.DesiredNumberScheduled {
+ allReady = false
+ notReady++
+ }
+ }
+
+ if allReady {
+ log.Printf("All %d DaemonSets in %s are ready", len(dsList.Items), namespace)
+ return nil
+ }
+
+ log.Printf("Waiting for DaemonSets: %d/%d not ready yet...", notReady, len(dsList.Items))
+ time.Sleep(5 * time.Second)
+ }
+
+ return fmt.Errorf("timeout waiting for DaemonSets to be ready")
+}
+
+// ComplexReferencesScenario - Tests init containers, valueFrom, and projected volumes.
+type ComplexReferencesScenario struct{}
+
+func (s *ComplexReferencesScenario) Name() string { return "S13" }
+func (s *ComplexReferencesScenario) Description() string {
+ return "Complex references (init containers, valueFrom, projected volumes)"
+}
+
+func (s *ComplexReferencesScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S13: Creating ConfigMaps and complex deployments with various reference types...")
+
+ const numConfigMaps = 5
+ const numDeployments = 5
+
+ setupCtx := context.Background()
+
+ // Create ConfigMaps with multiple keys
+ for i := 0; i < numConfigMaps; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("complex-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{
+ "key1": fmt.Sprintf("value1-%d", i),
+ "key2": fmt.Sprintf("value2-%d", i),
+ "config": fmt.Sprintf("config-%d", i),
+ },
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err)
+ }
+ }
+
+ // Create complex deployments with various reference types
+ for i := 0; i < numDeployments; i++ {
+ // Each deployment references multiple ConfigMaps in different ways
+ primaryCM := fmt.Sprintf("complex-cm-%d", i)
+ secondaryCM := fmt.Sprintf("complex-cm-%d", (i+1)%numConfigMaps)
+
+ deploy := createComplexDeployment(
+ fmt.Sprintf("complex-deploy-%d", i),
+ namespace,
+ primaryCM,
+ secondaryCM,
+ )
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create complex Deployment: %v", err)
+ }
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S13: Starting ConfigMap updates to test all reference types...")
+
+ updateCount := 0
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return s.calculateExpected(updateCount, numConfigMaps, numDeployments), nil
+ case <-ticker.C:
+ // Update a random ConfigMap
+ cmIndex := rand.Intn(numConfigMaps)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("complex-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key1"] = fmt.Sprintf("updated-value1-%d-%d", cmIndex, updateCount)
+ cm.Data["config"] = fmt.Sprintf("updated-config-%d-%d", cmIndex, updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update ConfigMap: %v", err)
+ } else {
+ updateCount++
+ log.Printf("S13: Updated complex-cm-%d (update #%d)", cmIndex, updateCount)
+ }
+ }
+ }
+
+ log.Printf("S13: Completed %d ConfigMap updates", updateCount)
+ return s.calculateExpected(updateCount, numConfigMaps, numDeployments), nil
+}
+
+func (s *ComplexReferencesScenario) calculateExpected(updateCount, numConfigMaps, numDeployments int) ExpectedMetrics {
+ // Each ConfigMap is referenced by:
+ // - 1 deployment as primary (envFrom in init + valueFrom in main + volume mount)
+ // - 1 deployment as secondary (projected volume)
+ // So each CM update triggers 2 deployments (on average with random updates)
+ // But since we're randomly updating, each update affects those 2 deployments
+ expectedReloadsPerUpdate := 2
+ expectedReloads := updateCount * expectedReloadsPerUpdate
+
+ return ExpectedMetrics{
+ ActionTotal: expectedReloads,
+ ReloadExecutedTotal: expectedReloads,
+ Description: fmt.Sprintf("S13: %d CM updates × ~%d affected deploys = ~%d reloads (init containers, valueFrom, volumes, projected)",
+ updateCount, expectedReloadsPerUpdate, expectedReloads),
+ }
+}
+
+// PauseResumeScenario - Tests pause-period functionality under rapid updates.
+type PauseResumeScenario struct{}
+
+func (s *PauseResumeScenario) Name() string { return "S12" }
+func (s *PauseResumeScenario) Description() string {
+ return "Pause & Resume (rapid updates with pause-period)"
+}
+
+func (s *PauseResumeScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ log.Println("S12: Creating ConfigMaps and Deployments with pause-period annotation...")
+
+ const numConfigMaps = 10
+ const numDeployments = 10
+ const pausePeriod = 15 * time.Second
+ const updateInterval = 2 * time.Second
+
+ setupCtx := context.Background()
+
+ // Create ConfigMaps
+ for i := 0; i < numConfigMaps; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("pause-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{"key": "initial-value"},
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err)
+ }
+ }
+
+ // Create Deployments with pause-period annotation
+ for i := 0; i < numDeployments; i++ {
+ deploy := createDeploymentWithPause(
+ fmt.Sprintf("pause-deploy-%d", i),
+ namespace,
+ fmt.Sprintf("pause-cm-%d", i),
+ pausePeriod,
+ )
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Deployment: %v", err)
+ }
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Printf("S12: Starting rapid ConfigMap updates (every %v) with %v pause-period...", updateInterval, pausePeriod)
+
+ updateCount := 0
+ ticker := time.NewTicker(updateInterval)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 5*time.Second)
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return s.calculateExpected(updateCount, duration, updateInterval, pausePeriod), nil
+ case <-ticker.C:
+ // Update a random ConfigMap
+ cmIndex := rand.Intn(numConfigMaps)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("pause-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("update-%d-%d", cmIndex, updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update ConfigMap: %v", err)
+ } else {
+ updateCount++
+ }
+ }
+ }
+
+ log.Printf("S12: Completed %d rapid updates (pause-period should reduce actual reloads)", updateCount)
+ return s.calculateExpected(updateCount, duration, updateInterval, pausePeriod), nil
+}
+
+func (s *PauseResumeScenario) calculateExpected(updateCount int, duration, updateInterval, pausePeriod time.Duration) ExpectedMetrics {
+ // With pause-period, we expect fewer reloads than updates
+ // Each deployment gets updates at random, and pause-period prevents rapid consecutive reloads
+ // The exact count depends on the distribution of updates across ConfigMaps
+ // Rough estimate: each CM gets updated ~(updateCount/10) times
+ // With 15s pause and 2s interval, we get roughly 1 reload per pause period per CM
+ // So expected reloads ≈ duration / pausePeriod per deployment = (duration/pausePeriod) * numDeployments
+
+ // This is an approximation - the actual value depends on random distribution
+ expectedCycles := int(duration / pausePeriod)
+ if expectedCycles < 1 {
+ expectedCycles = 1
+ }
+
+ return ExpectedMetrics{
+ // Don't set exact expected values since pause-period makes counts unpredictable
+ // The scenario validates that reloads << updates due to pause behavior
+ Description: fmt.Sprintf("S12: %d updates with %v pause-period (expect ~%d reload cycles, actual reloads << updates)",
+ updateCount, pausePeriod, expectedCycles),
+ }
+}
+
+// AnnotationStrategyScenario - Tests annotation-based reload strategy.
+// This scenario deploys its own Reloader instance with --reload-strategy=annotations.
+type AnnotationStrategyScenario struct {
+ // Image is the Reloader image to use. Must be set before running.
+ Image string
+}
+
+func (s *AnnotationStrategyScenario) Name() string { return "S11" }
+func (s *AnnotationStrategyScenario) Description() string {
+ return "Annotation reload strategy (--reload-strategy=annotations)"
+}
+
+func (s *AnnotationStrategyScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) {
+ if s.Image == "" {
+ return ExpectedMetrics{}, fmt.Errorf("S11 requires Image to be set (use the same image as --new-image)")
+ }
+
+ log.Println("S11: Deploying Reloader with --reload-strategy=annotations...")
+
+ // Deploy S11's own Reloader instance
+ reloaderNS := "reloader-s11"
+ mgr := reloader.NewManager(reloader.Config{
+ Version: "s11",
+ Image: s.Image,
+ Namespace: reloaderNS,
+ ReloadStrategy: "annotations",
+ })
+
+ if err := mgr.Deploy(ctx); err != nil {
+ return ExpectedMetrics{}, fmt.Errorf("deploying S11 reloader: %w", err)
+ }
+
+ // Ensure cleanup on exit
+ defer func() {
+ log.Println("S11: Cleaning up S11-specific Reloader...")
+ cleanupCtx := context.Background()
+ if err := mgr.Cleanup(cleanupCtx); err != nil {
+ log.Printf("Warning: failed to cleanup S11 reloader: %v", err)
+ }
+ }()
+
+ log.Println("S11: Creating ConfigMaps and Deployments...")
+
+ const numConfigMaps = 10
+ const numDeployments = 10
+
+ setupCtx := context.Background()
+
+ // Create ConfigMaps
+ for i := 0; i < numConfigMaps; i++ {
+ cm := &corev1.ConfigMap{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: fmt.Sprintf("annot-cm-%d", i),
+ Namespace: namespace,
+ },
+ Data: map[string]string{"key": "initial-value"},
+ }
+ if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err)
+ }
+ }
+
+ // Create Deployments
+ for i := 0; i < numDeployments; i++ {
+ deploy := createDeployment(fmt.Sprintf("annot-deploy-%d", i), namespace, fmt.Sprintf("annot-cm-%d", i))
+ if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil {
+ log.Printf("Failed to create Deployment: %v", err)
+ }
+ }
+
+ if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil {
+ log.Printf("Warning: %v - continuing anyway", err)
+ }
+
+ log.Println("S11: Starting ConfigMap updates with annotation strategy...")
+
+ updateCount := 0
+ annotationUpdatesSeen := 0
+ ticker := time.NewTicker(500 * time.Millisecond)
+ defer ticker.Stop()
+
+ endTime := time.Now().Add(duration - 10*time.Second) // Extra time for cleanup
+ for time.Now().Before(endTime) {
+ select {
+ case <-ctx.Done():
+ return s.calculateExpected(updateCount, annotationUpdatesSeen), nil
+ case <-ticker.C:
+ // Update a random ConfigMap
+ cmIndex := rand.Intn(numConfigMaps)
+ cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("annot-cm-%d", cmIndex), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ cm.Data["key"] = fmt.Sprintf("update-%d-%d", cmIndex, updateCount)
+ if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil {
+ log.Printf("Failed to update ConfigMap: %v", err)
+ } else {
+ updateCount++
+ }
+
+ // Periodically check for annotation updates on deployments
+ if updateCount%10 == 0 {
+ deploy, err := client.AppsV1().Deployments(namespace).Get(setupCtx, fmt.Sprintf("annot-deploy-%d", cmIndex), metav1.GetOptions{})
+ if err == nil {
+ if _, hasAnnotation := deploy.Spec.Template.Annotations["reloader.stakater.com/last-reloaded-from"]; hasAnnotation {
+ annotationUpdatesSeen++
+ }
+ }
+ }
+ }
+ }
+
+ // Final check: verify annotation strategy is working
+ log.Println("S11: Verifying annotation-based reload...")
+ time.Sleep(5 * time.Second) // Allow time for final updates to propagate
+
+ deploysWithAnnotation := 0
+ for i := 0; i < numDeployments; i++ {
+ deploy, err := client.AppsV1().Deployments(namespace).Get(setupCtx, fmt.Sprintf("annot-deploy-%d", i), metav1.GetOptions{})
+ if err != nil {
+ continue
+ }
+ if deploy.Spec.Template.Annotations != nil {
+ if _, ok := deploy.Spec.Template.Annotations["reloader.stakater.com/last-reloaded-from"]; ok {
+ deploysWithAnnotation++
+ }
+ }
+ }
+
+ log.Printf("S11: Completed %d updates, %d deployments have reload annotation", updateCount, deploysWithAnnotation)
+ return s.calculateExpected(updateCount, deploysWithAnnotation), nil
+}
+
+func (s *AnnotationStrategyScenario) calculateExpected(updateCount, deploysWithAnnotation int) ExpectedMetrics {
+ return ExpectedMetrics{
+ ActionTotal: updateCount,
+ ReloadExecutedTotal: updateCount,
+ Description: fmt.Sprintf("S11: %d updates with annotation strategy, %d deployments received annotation",
+ updateCount, deploysWithAnnotation),
+ }
+}
+
+func createDeploymentWithPause(name, namespace, configMapName string, pausePeriod time.Duration) *appsv1.Deployment {
+ replicas := int32(1)
+ maxSurge := intstr.FromInt(1)
+ maxUnavailable := intstr.FromInt(1)
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.Deployment{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ // Deployment-specific pause-period annotation
+ "deployment.reloader.stakater.com/pause-period": fmt.Sprintf("%ds", int(pausePeriod.Seconds())),
+ },
+ },
+ Spec: appsv1.DeploymentSpec{
+ Replicas: &replicas,
+ Strategy: appsv1.DeploymentStrategy{
+ Type: appsv1.RollingUpdateDeploymentStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDeployment{
+ MaxSurge: &maxSurge,
+ MaxUnavailable: &maxUnavailable,
+ },
+ },
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ Containers: []corev1.Container{
+ {
+ Name: "app",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ ConfigMapRef: &corev1.ConfigMapEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: configMapName,
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
+
+// createComplexDeployment creates a deployment with multiple ConfigMap reference types.
+// - Init container using envFrom
+// - Main container using env.valueFrom.configMapKeyRef
+// - Sidecar container using volume mount
+// - Projected volume combining multiple ConfigMaps
+func createComplexDeployment(name, namespace, primaryCM, secondaryCM string) *appsv1.Deployment {
+ replicas := int32(1)
+ maxSurge := intstr.FromInt(1)
+ maxUnavailable := intstr.FromInt(1)
+ terminationGracePeriod := int64(0)
+
+ return &appsv1.Deployment{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Annotations: map[string]string{
+ "reloader.stakater.com/auto": "true",
+ },
+ },
+ Spec: appsv1.DeploymentSpec{
+ Replicas: &replicas,
+ Strategy: appsv1.DeploymentStrategy{
+ Type: appsv1.RollingUpdateDeploymentStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDeployment{
+ MaxSurge: &maxSurge,
+ MaxUnavailable: &maxUnavailable,
+ },
+ },
+ Selector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{"app": name},
+ },
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{"app": name},
+ },
+ Spec: corev1.PodSpec{
+ TerminationGracePeriodSeconds: &terminationGracePeriod,
+ // Init container using envFrom
+ InitContainers: []corev1.Container{
+ {
+ Name: "init",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "echo Init done"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ EnvFrom: []corev1.EnvFromSource{
+ {
+ ConfigMapRef: &corev1.ConfigMapEnvSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: primaryCM,
+ },
+ },
+ },
+ },
+ },
+ },
+ Containers: []corev1.Container{
+ // Main container using valueFrom (individual keys)
+ {
+ Name: "main",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ Env: []corev1.EnvVar{
+ {
+ Name: "CONFIG_KEY1",
+ ValueFrom: &corev1.EnvVarSource{
+ ConfigMapKeyRef: &corev1.ConfigMapKeySelector{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: primaryCM,
+ },
+ Key: "key1",
+ },
+ },
+ },
+ {
+ Name: "CONFIG_KEY2",
+ ValueFrom: &corev1.EnvVarSource{
+ ConfigMapKeyRef: &corev1.ConfigMapKeySelector{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: primaryCM,
+ },
+ Key: "key2",
+ },
+ },
+ },
+ },
+ },
+ // Sidecar using volume mount
+ {
+ Name: "sidecar",
+ Image: "gcr.io/google-containers/busybox:1.27",
+ Command: []string{"sh", "-c", "sleep 999999999"},
+ Resources: corev1.ResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1m"),
+ corev1.ResourceMemory: resource.MustParse("4Mi"),
+ },
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("10m"),
+ corev1.ResourceMemory: resource.MustParse("16Mi"),
+ },
+ },
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "config-volume",
+ MountPath: "/etc/config",
+ },
+ {
+ Name: "projected-volume",
+ MountPath: "/etc/projected",
+ },
+ },
+ },
+ },
+ Volumes: []corev1.Volume{
+ // Regular ConfigMap volume
+ {
+ Name: "config-volume",
+ VolumeSource: corev1.VolumeSource{
+ ConfigMap: &corev1.ConfigMapVolumeSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: primaryCM,
+ },
+ },
+ },
+ },
+ // Projected volume combining multiple ConfigMaps
+ {
+ Name: "projected-volume",
+ VolumeSource: corev1.VolumeSource{
+ Projected: &corev1.ProjectedVolumeSource{
+ Sources: []corev1.VolumeProjection{
+ {
+ ConfigMap: &corev1.ConfigMapProjection{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: primaryCM,
+ },
+ Items: []corev1.KeyToPath{
+ {
+ Key: "key1",
+ Path: "primary-key1",
+ },
+ },
+ },
+ },
+ {
+ ConfigMap: &corev1.ConfigMapProjection{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: secondaryCM,
+ },
+ Items: []corev1.KeyToPath{
+ {
+ Key: "key1",
+ Path: "secondary-key1",
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ }
+}
diff --git a/test/loadtest/manifests/prometheus.yaml b/test/loadtest/manifests/prometheus.yaml
new file mode 100644
index 0000000..f826f52
--- /dev/null
+++ b/test/loadtest/manifests/prometheus.yaml
@@ -0,0 +1,181 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: prometheus-config
+ namespace: monitoring
+data:
+ prometheus.yml: |
+ global:
+ scrape_interval: 2s
+ evaluation_interval: 2s
+
+ scrape_configs:
+ - job_name: 'prometheus'
+ static_configs:
+ - targets: ['localhost:9090']
+
+ - job_name: 'reloader-old'
+ kubernetes_sd_configs:
+ - role: pod
+ namespaces:
+ names:
+ - reloader-old
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+ action: replace
+ regex: ([^:]+)(?::\d+)?;(\d+)
+ replacement: $1:$2
+ target_label: __address__
+ - action: labelmap
+ regex: __meta_kubernetes_pod_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_pod_name]
+ action: replace
+ target_label: kubernetes_pod_name
+
+ - job_name: 'reloader-new'
+ kubernetes_sd_configs:
+ - role: pod
+ namespaces:
+ names:
+ - reloader-new
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+ action: replace
+ regex: ([^:]+)(?::\d+)?;(\d+)
+ replacement: $1:$2
+ target_label: __address__
+ - action: labelmap
+ regex: __meta_kubernetes_pod_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_pod_name]
+ action: replace
+ target_label: kubernetes_pod_name
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: prometheus
+ namespace: monitoring
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: prometheus
+rules:
+ - apiGroups: [""]
+ resources:
+ - nodes
+ - nodes/proxy
+ - services
+ - endpoints
+ - pods
+ verbs: ["get", "list", "watch"]
+ - apiGroups: [""]
+ resources:
+ - configmaps
+ verbs: ["get"]
+ - nonResourceURLs: ["/metrics"]
+ verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: prometheus
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: prometheus
+subjects:
+ - kind: ServiceAccount
+ name: prometheus
+ namespace: monitoring
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: prometheus
+ namespace: monitoring
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: prometheus
+ template:
+ metadata:
+ labels:
+ app: prometheus
+ spec:
+ serviceAccountName: prometheus
+ containers:
+ - name: prometheus
+ image: quay.io/prometheus/prometheus:v2.47.0
+ args:
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --storage.tsdb.path=/prometheus
+ - --web.console.libraries=/usr/share/prometheus/console_libraries
+ - --web.console.templates=/usr/share/prometheus/consoles
+ - --web.enable-lifecycle
+ ports:
+ - containerPort: 9090
+ volumeMounts:
+ - name: config
+ mountPath: /etc/prometheus
+ - name: data
+ mountPath: /prometheus
+ resources:
+ limits:
+ cpu: 1000m
+ memory: 1Gi
+ requests:
+ cpu: 200m
+ memory: 512Mi
+ readinessProbe:
+ httpGet:
+ path: /-/ready
+ port: 9090
+ initialDelaySeconds: 5
+ periodSeconds: 5
+ livenessProbe:
+ httpGet:
+ path: /-/healthy
+ port: 9090
+ initialDelaySeconds: 10
+ periodSeconds: 10
+ volumes:
+ - name: config
+ configMap:
+ name: prometheus-config
+ - name: data
+ emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: prometheus
+ namespace: monitoring
+spec:
+ selector:
+ app: prometheus
+ ports:
+ - port: 9090
+ targetPort: 9090
+ type: NodePort