diff --git a/.github/actions/loadtest/action.yml b/.github/actions/loadtest/action.yml new file mode 100644 index 0000000..3f71ae9 --- /dev/null +++ b/.github/actions/loadtest/action.yml @@ -0,0 +1,267 @@ +name: 'Reloader Load Test' +description: 'Run Reloader load tests with A/B comparison support' + +inputs: + old-ref: + description: 'Git ref for "old" version (optional, enables A/B comparison)' + required: false + default: '' + new-ref: + description: 'Git ref for "new" version (defaults to current checkout)' + required: false + default: '' + old-image: + description: 'Pre-built container image for "old" version (alternative to old-ref)' + required: false + default: '' + new-image: + description: 'Pre-built container image for "new" version (alternative to new-ref)' + required: false + default: '' + scenarios: + description: 'Scenarios to run: S1,S4,S6 or all' + required: false + default: 'S1,S4,S6' + test-type: + description: 'Test type label for summary: quick or full' + required: false + default: 'quick' + duration: + description: 'Test duration in seconds' + required: false + default: '60' + kind-cluster: + description: 'Name of existing Kind cluster (if empty, creates new one)' + required: false + default: '' + post-comment: + description: 'Post results as PR comment' + required: false + default: 'false' + pr-number: + description: 'PR number for commenting (required if post-comment is true)' + required: false + default: '' + github-token: + description: 'GitHub token for posting comments' + required: false + default: ${{ github.token }} + comment-header: + description: 'Optional header text for the comment' + required: false + default: '' + +outputs: + status: + description: 'Overall test status: pass or fail' + value: ${{ steps.run.outputs.status }} + summary: + description: 'Markdown summary of results' + value: ${{ steps.summary.outputs.summary }} + pass-count: + description: 'Number of passed scenarios' + value: ${{ steps.summary.outputs.pass_count }} + fail-count: + description: 'Number of failed scenarios' + value: ${{ steps.summary.outputs.fail_count }} + +runs: + using: 'composite' + steps: + - name: Determine images to use + id: images + shell: bash + run: | + # Determine old image + if [ -n "${{ inputs.old-image }}" ]; then + echo "old=${{ inputs.old-image }}" >> $GITHUB_OUTPUT + elif [ -n "${{ inputs.old-ref }}" ]; then + echo "old=localhost/reloader:old" >> $GITHUB_OUTPUT + echo "build_old=true" >> $GITHUB_OUTPUT + else + echo "old=" >> $GITHUB_OUTPUT + fi + + # Determine new image + if [ -n "${{ inputs.new-image }}" ]; then + echo "new=${{ inputs.new-image }}" >> $GITHUB_OUTPUT + elif [ -n "${{ inputs.new-ref }}" ]; then + echo "new=localhost/reloader:new" >> $GITHUB_OUTPUT + echo "build_new=true" >> $GITHUB_OUTPUT + else + # Default: build from current checkout + echo "new=localhost/reloader:new" >> $GITHUB_OUTPUT + echo "build_new_current=true" >> $GITHUB_OUTPUT + fi + + - name: Build old image from ref + if: steps.images.outputs.build_old == 'true' + shell: bash + run: | + CURRENT_SHA=$(git rev-parse HEAD) + git checkout ${{ inputs.old-ref }} + docker build -t localhost/reloader:old . + echo "Built old image from ref: ${{ inputs.old-ref }}" + git checkout $CURRENT_SHA + + - name: Build new image from ref + if: steps.images.outputs.build_new == 'true' + shell: bash + run: | + CURRENT_SHA=$(git rev-parse HEAD) + git checkout ${{ inputs.new-ref }} + docker build -t localhost/reloader:new . + echo "Built new image from ref: ${{ inputs.new-ref }}" + git checkout $CURRENT_SHA + + - name: Build new image from current checkout + if: steps.images.outputs.build_new_current == 'true' + shell: bash + run: | + docker build -t localhost/reloader:new . + echo "Built new image from current checkout" + + - name: Build loadtest binary + shell: bash + run: | + cd ${{ github.workspace }}/test/loadtest + go build -o loadtest ./cmd/loadtest + + - name: Determine cluster name + id: cluster + shell: bash + run: | + if [ -n "${{ inputs.kind-cluster }}" ]; then + echo "name=${{ inputs.kind-cluster }}" >> $GITHUB_OUTPUT + echo "skip=true" >> $GITHUB_OUTPUT + else + echo "name=reloader-loadtest" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT + fi + + - name: Load images into Kind + shell: bash + run: | + CLUSTER="${{ steps.cluster.outputs.name }}" + + if [ -n "${{ steps.images.outputs.old }}" ]; then + echo "Loading old image: ${{ steps.images.outputs.old }}" + kind load docker-image "${{ steps.images.outputs.old }}" --name "$CLUSTER" || true + fi + + echo "Loading new image: ${{ steps.images.outputs.new }}" + kind load docker-image "${{ steps.images.outputs.new }}" --name "$CLUSTER" || true + + - name: Run load tests + id: run + shell: bash + run: | + cd ${{ github.workspace }}/test/loadtest + + ARGS="--new-image=${{ steps.images.outputs.new }}" + ARGS="$ARGS --scenario=${{ inputs.scenarios }}" + ARGS="$ARGS --duration=${{ inputs.duration }}" + ARGS="$ARGS --cluster-name=${{ steps.cluster.outputs.name }}" + ARGS="$ARGS --skip-image-load" + + if [ -n "${{ steps.images.outputs.old }}" ]; then + ARGS="$ARGS --old-image=${{ steps.images.outputs.old }}" + fi + + if [ "${{ steps.cluster.outputs.skip }}" = "true" ]; then + ARGS="$ARGS --skip-cluster" + fi + + echo "Running: ./loadtest run $ARGS" + if ./loadtest run $ARGS; then + echo "status=pass" >> $GITHUB_OUTPUT + else + echo "status=fail" >> $GITHUB_OUTPUT + fi + + - name: Generate summary + id: summary + shell: bash + run: | + cd ${{ github.workspace }}/test/loadtest + + # Generate markdown summary + ./loadtest summary \ + --results-dir=./results \ + --test-type=${{ inputs.test-type }} \ + --format=markdown > summary.md 2>/dev/null || true + + # Output to GitHub Step Summary + cat summary.md >> $GITHUB_STEP_SUMMARY + + # Store summary for output (using heredoc for multiline) + { + echo 'summary<> $GITHUB_OUTPUT + + # Get pass/fail counts from JSON + COUNTS=$(./loadtest summary --format=json 2>/dev/null | head -20 || echo '{}') + echo "pass_count=$(echo "$COUNTS" | grep -o '"pass_count": [0-9]*' | grep -o '[0-9]*' || echo 0)" >> $GITHUB_OUTPUT + echo "fail_count=$(echo "$COUNTS" | grep -o '"fail_count": [0-9]*' | grep -o '[0-9]*' || echo 0)" >> $GITHUB_OUTPUT + + - name: Post PR comment + if: inputs.post-comment == 'true' && inputs.pr-number != '' + continue-on-error: true + uses: actions/github-script@v7 + with: + github-token: ${{ inputs.github-token }} + script: | + const fs = require('fs'); + const summaryPath = '${{ github.workspace }}/test/loadtest/summary.md'; + let summary = 'No results available'; + try { + summary = fs.readFileSync(summaryPath, 'utf8'); + } catch (e) { + console.log('Could not read summary file:', e.message); + } + + const header = '${{ inputs.comment-header }}'; + const status = '${{ steps.run.outputs.status }}'; + const statusEmoji = status === 'pass' ? ':white_check_mark:' : ':x:'; + + const body = [ + header ? header : `## ${statusEmoji} Load Test Results (${{ inputs.test-type }})`, + '', + summary, + '', + '---', + `**Artifacts:** [Download](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})`, + ].join('\n'); + + try { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ inputs.pr-number }}, + body: body + }); + console.log('Comment posted successfully'); + } catch (error) { + if (error.status === 403) { + console.log('Could not post comment (fork PR with restricted permissions). Use /loadtest command to run with comment posting.'); + } else { + throw error; + } + } + + - name: Upload results + uses: actions/upload-artifact@v4 + if: always() + with: + name: loadtest-${{ inputs.test-type }}-results + path: | + ${{ github.workspace }}/test/loadtest/results/ + retention-days: 30 + + - name: Cleanup Kind cluster (only if we created it) + if: always() && steps.cluster.outputs.skip == 'false' + shell: bash + run: | + kind delete cluster --name ${{ steps.cluster.outputs.name }} || true diff --git a/.github/workflows/loadtest.yml b/.github/workflows/loadtest.yml new file mode 100644 index 0000000..c997e13 --- /dev/null +++ b/.github/workflows/loadtest.yml @@ -0,0 +1,112 @@ +name: Load Test (Full) + +on: + issue_comment: + types: [created] + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + loadtest: + # Only run on PR comments with /loadtest command + if: | + github.event.issue.pull_request && + contains(github.event.comment.body, '/loadtest') + runs-on: ubuntu-latest + + steps: + - name: Add reaction to comment + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: 'rocket' + }); + + - name: Get PR details + id: pr + uses: actions/github-script@v7 + with: + script: | + const pr = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number + }); + core.setOutput('head_ref', pr.data.head.ref); + core.setOutput('head_sha', pr.data.head.sha); + core.setOutput('base_ref', pr.data.base.ref); + core.setOutput('base_sha', pr.data.base.sha); + console.log(`PR #${context.issue.number}: ${pr.data.head.ref} -> ${pr.data.base.ref}`); + + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + ref: ${{ steps.pr.outputs.head_sha }} + fetch-depth: 0 # Full history for building from base ref + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: false + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install kind + run: | + curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64 + chmod +x ./kind + sudo mv ./kind /usr/local/bin/kind + + - name: Install kubectl + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + sudo mv kubectl /usr/local/bin/kubectl + + - name: Run full A/B comparison load test + id: loadtest + uses: ./.github/actions/loadtest + with: + old-ref: ${{ steps.pr.outputs.base_sha }} + new-ref: ${{ steps.pr.outputs.head_sha }} + scenarios: 'all' + test-type: 'full' + post-comment: 'true' + pr-number: ${{ github.event.issue.number }} + comment-header: | + ## Load Test Results (Full A/B Comparison) + **Comparing:** `${{ steps.pr.outputs.base_ref }}` → `${{ steps.pr.outputs.head_ref }}` + **Triggered by:** @${{ github.event.comment.user.login }} + + - name: Add success reaction + if: steps.loadtest.outputs.status == 'pass' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: '+1' + }); + + - name: Add failure reaction + if: steps.loadtest.outputs.status == 'fail' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: '-1' + }); diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index e4b1c6f..c428826 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -35,6 +35,8 @@ jobs: permissions: contents: read + pull-requests: write + issues: write runs-on: ubuntu-latest name: Build @@ -109,6 +111,17 @@ jobs: - name: Test run: make test + - name: Run quick A/B load tests + uses: ./.github/actions/loadtest + with: + old-ref: ${{ github.event.pull_request.base.sha }} + # new-ref defaults to current checkout (PR branch) + scenarios: 'S1,S4,S6' + test-type: 'quick' + kind-cluster: 'kind' # Use the existing cluster created above + post-comment: 'true' + pr-number: ${{ github.event.pull_request.number }} + - name: Generate Tags id: generate_tag run: | diff --git a/.gitignore b/.gitignore index 73da63e..3f28c3f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,9 +11,14 @@ vendor dist Reloader !**/chart/reloader +!**/internal/reloader *.tgz styles/ site/ /mkdocs.yml yq -bin \ No newline at end of file +bin +test/loadtest/results +test/loadtest/loadtest +# Temporary NFS files +.nfs* diff --git a/Makefile b/Makefile index 8444e1f..8c0aed8 100644 --- a/Makefile +++ b/Makefile @@ -169,3 +169,43 @@ yq-install: @curl -sL $(YQ_DOWNLOAD_URL) -o $(YQ_BIN) @chmod +x $(YQ_BIN) @echo "yq $(YQ_VERSION) installed at $(YQ_BIN)" + +# ============================================================================= +# Load Testing +# ============================================================================= + +LOADTEST_BIN = test/loadtest/loadtest +LOADTEST_OLD_IMAGE ?= localhost/reloader:old +LOADTEST_NEW_IMAGE ?= localhost/reloader:new +LOADTEST_DURATION ?= 60 +LOADTEST_SCENARIOS ?= all + +.PHONY: loadtest-build loadtest-quick loadtest-full loadtest loadtest-clean + +loadtest-build: ## Build loadtest binary + cd test/loadtest && $(GOCMD) build -o loadtest ./cmd/loadtest + +loadtest-quick: loadtest-build ## Run quick load tests (S1, S4, S6) + cd test/loadtest && ./loadtest run \ + --old-image=$(LOADTEST_OLD_IMAGE) \ + --new-image=$(LOADTEST_NEW_IMAGE) \ + --scenario=S1,S4,S6 \ + --duration=$(LOADTEST_DURATION) + +loadtest-full: loadtest-build ## Run full load test suite + cd test/loadtest && ./loadtest run \ + --old-image=$(LOADTEST_OLD_IMAGE) \ + --new-image=$(LOADTEST_NEW_IMAGE) \ + --scenario=all \ + --duration=$(LOADTEST_DURATION) + +loadtest: loadtest-build ## Run load tests with configurable scenarios (default: all) + cd test/loadtest && ./loadtest run \ + --old-image=$(LOADTEST_OLD_IMAGE) \ + --new-image=$(LOADTEST_NEW_IMAGE) \ + --scenario=$(LOADTEST_SCENARIOS) \ + --duration=$(LOADTEST_DURATION) + +loadtest-clean: ## Clean loadtest binary and results + rm -f $(LOADTEST_BIN) + rm -rf test/loadtest/results diff --git a/internal/pkg/controller/controller.go b/internal/pkg/controller/controller.go index 519923e..1a51d9a 100644 --- a/internal/pkg/controller/controller.go +++ b/internal/pkg/controller/controller.go @@ -110,6 +110,7 @@ func NewController( // Add function to add a new object to the queue in case of creating a resource func (c *Controller) Add(obj interface{}) { + c.collectors.RecordEventReceived("add", c.resource) switch object := obj.(type) { case *v1.Namespace: @@ -121,11 +122,14 @@ func (c *Controller) Add(obj interface{}) { if options.ReloadOnCreate == "true" { if !c.resourceInIgnoredNamespace(obj) && c.resourceInSelectedNamespaces(obj) && secretControllerInitialized && configmapControllerInitialized { - c.queue.Add(handler.ResourceCreatedHandler{ - Resource: obj, - Collectors: c.collectors, - Recorder: c.recorder, + c.enqueue(handler.ResourceCreatedHandler{ + Resource: obj, + Collectors: c.collectors, + Recorder: c.recorder, + EnqueueTime: time.Now(), }) + } else { + c.collectors.RecordSkipped("ignored_or_not_selected") } } } @@ -181,34 +185,44 @@ func (c *Controller) removeSelectedNamespaceFromCache(namespace v1.Namespace) { // Update function to add an old object and a new object to the queue in case of updating a resource func (c *Controller) Update(old interface{}, new interface{}) { + c.collectors.RecordEventReceived("update", c.resource) + switch new.(type) { case *v1.Namespace: return } if !c.resourceInIgnoredNamespace(new) && c.resourceInSelectedNamespaces(new) { - c.queue.Add(handler.ResourceUpdatedHandler{ + c.enqueue(handler.ResourceUpdatedHandler{ Resource: new, OldResource: old, Collectors: c.collectors, Recorder: c.recorder, + EnqueueTime: time.Now(), }) + } else { + c.collectors.RecordSkipped("ignored_or_not_selected") } } // Delete function to add an object to the queue in case of deleting a resource func (c *Controller) Delete(old interface{}) { + c.collectors.RecordEventReceived("delete", c.resource) + if _, ok := old.(*csiv1.SecretProviderClassPodStatus); ok { return } if options.ReloadOnDelete == "true" { if !c.resourceInIgnoredNamespace(old) && c.resourceInSelectedNamespaces(old) && secretControllerInitialized && configmapControllerInitialized { - c.queue.Add(handler.ResourceDeleteHandler{ - Resource: old, - Collectors: c.collectors, - Recorder: c.recorder, + c.enqueue(handler.ResourceDeleteHandler{ + Resource: old, + Collectors: c.collectors, + Recorder: c.recorder, + EnqueueTime: time.Now(), }) + } else { + c.collectors.RecordSkipped("ignored_or_not_selected") } } @@ -219,6 +233,13 @@ func (c *Controller) Delete(old interface{}) { } } +// enqueue adds an item to the queue and records metrics +func (c *Controller) enqueue(item interface{}) { + c.queue.Add(item) + c.collectors.RecordQueueAdd() + c.collectors.SetQueueDepth(c.queue.Len()) +} + // Run function for controller which handles the queue func (c *Controller) Run(threadiness int, stopCh chan struct{}) { defer runtime.HandleCrash() @@ -260,13 +281,34 @@ func (c *Controller) processNextItem() bool { if quit { return false } + + c.collectors.SetQueueDepth(c.queue.Len()) + // Tell the queue that we are done with processing this key. This unblocks the key for other workers // This allows safe parallel processing because two events with the same key are never processed in // parallel. defer c.queue.Done(resourceHandler) + // Record queue latency if the handler supports it + if h, ok := resourceHandler.(handler.TimedHandler); ok { + queueLatency := time.Since(h.GetEnqueueTime()) + c.collectors.RecordQueueLatency(queueLatency) + } + + // Track reconcile/handler duration + startTime := time.Now() + // Invoke the method containing the business logic err := resourceHandler.(handler.ResourceHandler).Handle() + + duration := time.Since(startTime) + + if err != nil { + c.collectors.RecordReconcile("error", duration) + } else { + c.collectors.RecordReconcile("success", duration) + } + // Handle the error if something went wrong during the execution of the business logic c.handleErr(err, resourceHandler) return true @@ -279,16 +321,26 @@ func (c *Controller) handleErr(err error, key interface{}) { // This ensures that future processing of updates for this key is not delayed because of // an outdated error history. c.queue.Forget(key) + + // Record successful event processing + c.collectors.RecordEventProcessed("unknown", c.resource, "success") return } + // Record error + c.collectors.RecordError("handler_error") + // This controller retries 5 times if something goes wrong. After that, it stops trying. if c.queue.NumRequeues(key) < 5 { logrus.Errorf("Error syncing events: %v", err) + // Record retry + c.collectors.RecordRetry() + // Re-enqueue the key rate limited. Based on the rate limiter on the // queue and the re-enqueue history, the key will be processed later again. c.queue.AddRateLimited(key) + c.collectors.SetQueueDepth(c.queue.Len()) return } @@ -297,6 +349,8 @@ func (c *Controller) handleErr(err error, key interface{}) { runtime.HandleError(err) logrus.Errorf("Dropping key out of the queue: %v", err) logrus.Debugf("Dropping the key %q out of the queue: %v", key, err) + + c.collectors.RecordEventProcessed("unknown", c.resource, "dropped") } func getClientForResource(resource string, coreClient kubernetes.Interface) (cache.Getter, error) { diff --git a/internal/pkg/controller/controller_test.go b/internal/pkg/controller/controller_test.go index 778b38d..c7eed63 100644 --- a/internal/pkg/controller/controller_test.go +++ b/internal/pkg/controller/controller_test.go @@ -2582,19 +2582,21 @@ func TestController_resourceInIgnoredNamespace(t *testing.T) { }, } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := &Controller{ - client: tt.fields.client, - indexer: tt.fields.indexer, - queue: tt.fields.queue, - informer: tt.fields.informer, - namespace: tt.fields.namespace, - ignoredNamespaces: tt.fields.ignoredNamespaces, - } - if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want { - t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want) - } - }) + t.Run( + tt.name, func(t *testing.T) { + c := &Controller{ + client: tt.fields.client, + indexer: tt.fields.indexer, + queue: tt.fields.queue, + informer: tt.fields.informer, + namespace: tt.fields.namespace, + ignoredNamespaces: tt.fields.ignoredNamespaces, + } + if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want { + t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want) + } + }, + ) } } @@ -2756,35 +2758,37 @@ func TestController_resourceInNamespaceSelector(t *testing.T) { } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fakeClient := fake.NewClientset() - namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{}) - logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name) + t.Run( + tt.name, func(t *testing.T) { + fakeClient := fake.NewClientset() + namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{}) + logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name) - c := &Controller{ - client: fakeClient, - indexer: tt.fields.indexer, - queue: tt.fields.queue, - informer: tt.fields.informer, - namespace: tt.fields.namespace.Name, - namespaceSelector: tt.fields.namespaceSelector, - } + c := &Controller{ + client: fakeClient, + indexer: tt.fields.indexer, + queue: tt.fields.queue, + informer: tt.fields.informer, + namespace: tt.fields.namespace.Name, + namespaceSelector: tt.fields.namespaceSelector, + } - listOptions := metav1.ListOptions{} - listOptions.LabelSelector = tt.fields.namespaceSelector - namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions) + listOptions := metav1.ListOptions{} + listOptions.LabelSelector = tt.fields.namespaceSelector + namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions) - for _, ns := range namespaces.Items { - c.addSelectedNamespaceToCache(ns) - } + for _, ns := range namespaces.Items { + c.addSelectedNamespaceToCache(ns) + } - if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want { - t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want) - } + if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want { + t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want) + } - for _, ns := range namespaces.Items { - c.removeSelectedNamespaceFromCache(ns) - } - }) + for _, ns := range namespaces.Items { + c.removeSelectedNamespaceFromCache(ns) + } + }, + ) } } diff --git a/internal/pkg/handler/create.go b/internal/pkg/handler/create.go index fab7378..d676610 100644 --- a/internal/pkg/handler/create.go +++ b/internal/pkg/handler/create.go @@ -1,6 +1,8 @@ package handler import ( + "time" + "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/metrics" "github.com/stakater/Reloader/internal/pkg/options" @@ -11,25 +13,46 @@ import ( // ResourceCreatedHandler contains new objects type ResourceCreatedHandler struct { - Resource interface{} - Collectors metrics.Collectors - Recorder record.EventRecorder + Resource interface{} + Collectors metrics.Collectors + Recorder record.EventRecorder + EnqueueTime time.Time // Time when this handler was added to the queue +} + +// GetEnqueueTime returns when this handler was enqueued +func (r ResourceCreatedHandler) GetEnqueueTime() time.Time { + return r.EnqueueTime } // Handle processes the newly created resource func (r ResourceCreatedHandler) Handle() error { + startTime := time.Now() + result := "error" + + defer func() { + r.Collectors.RecordReconcile(result, time.Since(startTime)) + }() + if r.Resource == nil { logrus.Errorf("Resource creation handler received nil resource") - } else { - config, _ := r.GetConfig() - // Send webhook - if options.WebhookUrl != "" { - return sendUpgradeWebhook(config, options.WebhookUrl) - } - // process resource based on its type - return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) + return nil } - return nil + + config, _ := r.GetConfig() + // Send webhook + if options.WebhookUrl != "" { + err := sendUpgradeWebhook(config, options.WebhookUrl) + if err == nil { + result = "success" + } + return err + } + // process resource based on its type + err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) + if err == nil { + result = "success" + } + return err } // GetConfig gets configurations containing SHA, annotations, namespace and resource name diff --git a/internal/pkg/handler/delete.go b/internal/pkg/handler/delete.go index 65c671e..34e032b 100644 --- a/internal/pkg/handler/delete.go +++ b/internal/pkg/handler/delete.go @@ -3,6 +3,7 @@ package handler import ( "fmt" "slices" + "time" "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/callbacks" @@ -20,25 +21,46 @@ import ( // ResourceDeleteHandler contains new objects type ResourceDeleteHandler struct { - Resource interface{} - Collectors metrics.Collectors - Recorder record.EventRecorder + Resource interface{} + Collectors metrics.Collectors + Recorder record.EventRecorder + EnqueueTime time.Time // Time when this handler was added to the queue +} + +// GetEnqueueTime returns when this handler was enqueued +func (r ResourceDeleteHandler) GetEnqueueTime() time.Time { + return r.EnqueueTime } // Handle processes resources being deleted func (r ResourceDeleteHandler) Handle() error { + startTime := time.Now() + result := "error" + + defer func() { + r.Collectors.RecordReconcile(result, time.Since(startTime)) + }() + if r.Resource == nil { logrus.Errorf("Resource delete handler received nil resource") - } else { - config, _ := r.GetConfig() - // Send webhook - if options.WebhookUrl != "" { - return sendUpgradeWebhook(config, options.WebhookUrl) - } - // process resource based on its type - return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy) + return nil } - return nil + + config, _ := r.GetConfig() + // Send webhook + if options.WebhookUrl != "" { + err := sendUpgradeWebhook(config, options.WebhookUrl) + if err == nil { + result = "success" + } + return err + } + // process resource based on its type + err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy) + if err == nil { + result = "success" + } + return err } // GetConfig gets configurations containing SHA, annotations, namespace and resource name diff --git a/internal/pkg/handler/handler.go b/internal/pkg/handler/handler.go index 1f5858e..9018f80 100644 --- a/internal/pkg/handler/handler.go +++ b/internal/pkg/handler/handler.go @@ -1,9 +1,18 @@ package handler -import "github.com/stakater/Reloader/pkg/common" +import ( + "time" + + "github.com/stakater/Reloader/pkg/common" +) // ResourceHandler handles the creation and update of resources type ResourceHandler interface { Handle() error GetConfig() (common.Config, string) } + +// TimedHandler is a handler that tracks when it was enqueued +type TimedHandler interface { + GetEnqueueTime() time.Time +} diff --git a/internal/pkg/handler/update.go b/internal/pkg/handler/update.go index 25a4380..3fde98e 100644 --- a/internal/pkg/handler/update.go +++ b/internal/pkg/handler/update.go @@ -1,6 +1,8 @@ package handler import ( + "time" + "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/metrics" "github.com/stakater/Reloader/internal/pkg/options" @@ -17,23 +19,49 @@ type ResourceUpdatedHandler struct { OldResource interface{} Collectors metrics.Collectors Recorder record.EventRecorder + EnqueueTime time.Time // Time when this handler was added to the queue +} + +// GetEnqueueTime returns when this handler was enqueued +func (r ResourceUpdatedHandler) GetEnqueueTime() time.Time { + return r.EnqueueTime } // Handle processes the updated resource func (r ResourceUpdatedHandler) Handle() error { + startTime := time.Now() + result := "error" + + defer func() { + r.Collectors.RecordReconcile(result, time.Since(startTime)) + }() + if r.Resource == nil || r.OldResource == nil { logrus.Errorf("Resource update handler received nil resource") - } else { - config, oldSHAData := r.GetConfig() - if config.SHAValue != oldSHAData { - // Send a webhook if update - if options.WebhookUrl != "" { - return sendUpgradeWebhook(config, options.WebhookUrl) - } - // process resource based on its type - return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) - } + return nil } + + config, oldSHAData := r.GetConfig() + if config.SHAValue != oldSHAData { + // Send a webhook if update + if options.WebhookUrl != "" { + err := sendUpgradeWebhook(config, options.WebhookUrl) + if err == nil { + result = "success" + } + return err + } + // process resource based on its type + err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) + if err == nil { + result = "success" + } + return err + } + + // No data change - skip + result = "skipped" + r.Collectors.RecordSkipped("no_data_change") return nil } diff --git a/internal/pkg/handler/upgrade.go b/internal/pkg/handler/upgrade.go index 6d63d5c..982dbfa 100644 --- a/internal/pkg/handler/upgrade.go +++ b/internal/pkg/handler/upgrade.go @@ -9,6 +9,7 @@ import ( "io" "os" "strings" + "time" "github.com/parnurzeal/gorequest" "github.com/prometheus/client_golang/prometheus" @@ -239,23 +240,35 @@ func rollingUpgrade(clients kube.Clients, config common.Config, upgradeFuncs cal func PerformAction(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy) error { items := upgradeFuncs.ItemsFunc(clients, config.Namespace) + // Record workloads scanned + collectors.RecordWorkloadsScanned(upgradeFuncs.ResourceType, len(items)) + + matchedCount := 0 for _, item := range items { - err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) error { + matched, err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) (bool, error) { return upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource) }) if err != nil { return err } + if matched { + matchedCount++ + } } + // Record workloads matched + collectors.RecordWorkloadsMatched(upgradeFuncs.ResourceType, matchedCount) + return nil } -func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error { +func retryOnConflict(backoff wait.Backoff, fn func(_ bool) (bool, error)) (bool, error) { var lastError error + var matched bool fetchResource := false // do not fetch resource on first attempt, already done by ItemsFunc err := wait.ExponentialBackoff(backoff, func() (bool, error) { - err := fn(fetchResource) + var err error + matched, err = fn(fetchResource) fetchResource = true switch { case err == nil: @@ -270,20 +283,22 @@ func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error { if wait.Interrupted(err) { err = lastError } - return err + return matched, err } -func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) error { +func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) (bool, error) { + actionStartTime := time.Now() + accessor, err := meta.Accessor(resource) if err != nil { - return err + return false, err } resourceName := accessor.GetName() if fetchResource { resource, err = upgradeFuncs.ItemFunc(clients, resourceName, config.Namespace) if err != nil { - return err + return false, err } } if config.Type == constants.SecretProviderClassEnvVarPostfix { @@ -296,13 +311,14 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca if !result.ShouldReload { logrus.Debugf("No changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace) - return nil + return false, nil } strategyResult := strategy(upgradeFuncs, resource, config, result.AutoReload) if strategyResult.Result != constants.Updated { - return nil + collectors.RecordSkipped("strategy_not_updated") + return false, nil } // find correct annotation and update the resource @@ -316,7 +332,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca _, err = PauseDeployment(deployment, clients, config.Namespace, pauseInterval) if err != nil { logrus.Errorf("Failed to pause deployment '%s' in namespace '%s': %v", resourceName, config.Namespace, err) - return err + return true, err } } } @@ -327,16 +343,19 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca err = upgradeFuncs.UpdateFunc(clients, config.Namespace, resource) } + actionLatency := time.Since(actionStartTime) + if err != nil { message := fmt.Sprintf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err) logrus.Errorf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err) collectors.Reloaded.With(prometheus.Labels{"success": "false"}).Inc() collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "false", "namespace": config.Namespace}).Inc() + collectors.RecordAction(upgradeFuncs.ResourceType, "error", actionLatency) if recorder != nil { recorder.Event(resource, v1.EventTypeWarning, "ReloadFail", message) } - return err + return true, err } else { message := fmt.Sprintf("Changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace) message += fmt.Sprintf(", Updated '%s' of type '%s' in namespace '%s'", resourceName, upgradeFuncs.ResourceType, config.Namespace) @@ -345,6 +364,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca collectors.Reloaded.With(prometheus.Labels{"success": "true"}).Inc() collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "true", "namespace": config.Namespace}).Inc() + collectors.RecordAction(upgradeFuncs.ResourceType, "success", actionLatency) alert_on_reload, ok := os.LookupEnv("ALERT_ON_RELOAD") if recorder != nil { recorder.Event(resource, v1.EventTypeNormal, "Reloaded", message) @@ -357,7 +377,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca } } - return nil + return true, nil } func getVolumeMountName(volumes []v1.Volume, mountType string, volumeName string) string { diff --git a/internal/pkg/metrics/prometheus.go b/internal/pkg/metrics/prometheus.go index 94153ea..4310393 100644 --- a/internal/pkg/metrics/prometheus.go +++ b/internal/pkg/metrics/prometheus.go @@ -1,54 +1,390 @@ package metrics import ( + "context" "net/http" + "net/url" "os" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "k8s.io/client-go/tools/metrics" ) +// clientGoRequestMetrics implements metrics.LatencyMetric and metrics.ResultMetric +// to expose client-go's rest_client_requests_total metric +type clientGoRequestMetrics struct { + requestCounter *prometheus.CounterVec + requestLatency *prometheus.HistogramVec +} + +func (m *clientGoRequestMetrics) Increment(ctx context.Context, code string, method string, host string) { + m.requestCounter.WithLabelValues(code, method, host).Inc() +} + +func (m *clientGoRequestMetrics) Observe(ctx context.Context, verb string, u url.URL, latency time.Duration) { + m.requestLatency.WithLabelValues(verb, u.Host).Observe(latency.Seconds()) +} + +var clientGoMetrics = &clientGoRequestMetrics{ + requestCounter: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rest_client_requests_total", + Help: "Number of HTTP requests, partitioned by status code, method, and host.", + }, + []string{"code", "method", "host"}, + ), + requestLatency: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_duration_seconds", + Help: "Request latency in seconds. Broken down by verb and host.", + Buckets: []float64{0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30}, + }, + []string{"verb", "host"}, + ), +} + +func init() { + // Register the metrics collectors + prometheus.MustRegister(clientGoMetrics.requestCounter) + prometheus.MustRegister(clientGoMetrics.requestLatency) + + // Register our metrics implementation with client-go + metrics.RequestResult = clientGoMetrics + metrics.RequestLatency = clientGoMetrics +} + +// Collectors holds all Prometheus metrics collectors for Reloader. type Collectors struct { Reloaded *prometheus.CounterVec ReloadedByNamespace *prometheus.CounterVec + countByNamespace bool + + ReconcileTotal *prometheus.CounterVec // Total reconcile calls by result + ReconcileDuration *prometheus.HistogramVec // Time spent in reconcile/handler + ActionTotal *prometheus.CounterVec // Total actions by workload kind and result + ActionLatency *prometheus.HistogramVec // Time from event to action applied + SkippedTotal *prometheus.CounterVec // Skipped operations by reason + QueueDepth prometheus.Gauge // Current queue depth + QueueAdds prometheus.Counter // Total items added to queue + QueueLatency *prometheus.HistogramVec // Time spent in queue + ErrorsTotal *prometheus.CounterVec // Errors by type + RetriesTotal prometheus.Counter // Total retries + EventsReceived *prometheus.CounterVec // Events received by type (add/update/delete) + EventsProcessed *prometheus.CounterVec // Events processed by type and result + WorkloadsScanned *prometheus.CounterVec // Workloads scanned by kind + WorkloadsMatched *prometheus.CounterVec // Workloads matched for reload by kind +} + +// RecordReload records a reload event with the given success status and namespace. +// Preserved for backward compatibility. +func (c *Collectors) RecordReload(success bool, namespace string) { + if c == nil { + return + } + + successLabel := "false" + if success { + successLabel = "true" + } + + c.Reloaded.With(prometheus.Labels{"success": successLabel}).Inc() + + if c.countByNamespace { + c.ReloadedByNamespace.With(prometheus.Labels{ + "success": successLabel, + "namespace": namespace, + }).Inc() + } +} + +// RecordReconcile records a reconcile/handler invocation. +func (c *Collectors) RecordReconcile(result string, duration time.Duration) { + if c == nil { + return + } + c.ReconcileTotal.With(prometheus.Labels{"result": result}).Inc() + c.ReconcileDuration.With(prometheus.Labels{"result": result}).Observe(duration.Seconds()) +} + +// RecordAction records a reload action on a workload. +func (c *Collectors) RecordAction(workloadKind string, result string, latency time.Duration) { + if c == nil { + return + } + c.ActionTotal.With(prometheus.Labels{"workload_kind": workloadKind, "result": result}).Inc() + c.ActionLatency.With(prometheus.Labels{"workload_kind": workloadKind}).Observe(latency.Seconds()) +} + +// RecordSkipped records a skipped operation with reason. +func (c *Collectors) RecordSkipped(reason string) { + if c == nil { + return + } + c.SkippedTotal.With(prometheus.Labels{"reason": reason}).Inc() +} + +// RecordQueueAdd records an item being added to the queue. +func (c *Collectors) RecordQueueAdd() { + if c == nil { + return + } + c.QueueAdds.Inc() +} + +// SetQueueDepth sets the current queue depth. +func (c *Collectors) SetQueueDepth(depth int) { + if c == nil { + return + } + c.QueueDepth.Set(float64(depth)) +} + +// RecordQueueLatency records how long an item spent in the queue. +func (c *Collectors) RecordQueueLatency(latency time.Duration) { + if c == nil { + return + } + c.QueueLatency.With(prometheus.Labels{}).Observe(latency.Seconds()) +} + +// RecordError records an error by type. +func (c *Collectors) RecordError(errorType string) { + if c == nil { + return + } + c.ErrorsTotal.With(prometheus.Labels{"type": errorType}).Inc() +} + +// RecordRetry records a retry attempt. +func (c *Collectors) RecordRetry() { + if c == nil { + return + } + c.RetriesTotal.Inc() +} + +// RecordEventReceived records an event being received. +func (c *Collectors) RecordEventReceived(eventType string, resourceType string) { + if c == nil { + return + } + c.EventsReceived.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType}).Inc() +} + +// RecordEventProcessed records an event being processed. +func (c *Collectors) RecordEventProcessed(eventType string, resourceType string, result string) { + if c == nil { + return + } + c.EventsProcessed.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType, "result": result}).Inc() +} + +// RecordWorkloadsScanned records workloads scanned during a reconcile. +func (c *Collectors) RecordWorkloadsScanned(kind string, count int) { + if c == nil { + return + } + c.WorkloadsScanned.With(prometheus.Labels{"kind": kind}).Add(float64(count)) +} + +// RecordWorkloadsMatched records workloads matched for reload. +func (c *Collectors) RecordWorkloadsMatched(kind string, count int) { + if c == nil { + return + } + c.WorkloadsMatched.With(prometheus.Labels{"kind": kind}).Add(float64(count)) } func NewCollectors() Collectors { + // Existing metrics (preserved) reloaded := prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "reloader", Name: "reload_executed_total", Help: "Counter of reloads executed by Reloader.", }, - []string{ - "success", - }, + []string{"success"}, ) - - //set 0 as default value reloaded.With(prometheus.Labels{"success": "true"}).Add(0) reloaded.With(prometheus.Labels{"success": "false"}).Add(0) - reloaded_by_namespace := prometheus.NewCounterVec( + reloadedByNamespace := prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "reloader", Name: "reload_executed_total_by_namespace", Help: "Counter of reloads executed by Reloader by namespace.", }, - []string{ - "success", - "namespace", + []string{"success", "namespace"}, + ) + + reconcileTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "reconcile_total", + Help: "Total number of reconcile/handler invocations by result.", + }, + []string{"result"}, + ) + + reconcileDuration := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "reloader", + Name: "reconcile_duration_seconds", + Help: "Time spent in reconcile/handler in seconds.", + Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}, + }, + []string{"result"}, + ) + + actionTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "action_total", + Help: "Total number of reload actions by workload kind and result.", + }, + []string{"workload_kind", "result"}, + ) + + actionLatency := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "reloader", + Name: "action_latency_seconds", + Help: "Time from event received to action applied in seconds.", + Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60}, + }, + []string{"workload_kind"}, + ) + + skippedTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "skipped_total", + Help: "Total number of skipped operations by reason.", + }, + []string{"reason"}, + ) + + queueDepth := prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "reloader", + Name: "workqueue_depth", + Help: "Current depth of the work queue.", }, ) + + queueAdds := prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "workqueue_adds_total", + Help: "Total number of items added to the work queue.", + }, + ) + + queueLatency := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "reloader", + Name: "workqueue_latency_seconds", + Help: "Time spent in the work queue in seconds.", + Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5}, + }, + []string{}, + ) + + errorsTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "errors_total", + Help: "Total number of errors by type.", + }, + []string{"type"}, + ) + + retriesTotal := prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "retries_total", + Help: "Total number of retry attempts.", + }, + ) + + eventsReceived := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "events_received_total", + Help: "Total number of events received by type and resource.", + }, + []string{"event_type", "resource_type"}, + ) + + eventsProcessed := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "events_processed_total", + Help: "Total number of events processed by type, resource, and result.", + }, + []string{"event_type", "resource_type", "result"}, + ) + + workloadsScanned := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "workloads_scanned_total", + Help: "Total number of workloads scanned by kind.", + }, + []string{"kind"}, + ) + + workloadsMatched := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "workloads_matched_total", + Help: "Total number of workloads matched for reload by kind.", + }, + []string{"kind"}, + ) + return Collectors{ Reloaded: reloaded, - ReloadedByNamespace: reloaded_by_namespace, + ReloadedByNamespace: reloadedByNamespace, + countByNamespace: os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled", + + ReconcileTotal: reconcileTotal, + ReconcileDuration: reconcileDuration, + ActionTotal: actionTotal, + ActionLatency: actionLatency, + SkippedTotal: skippedTotal, + QueueDepth: queueDepth, + QueueAdds: queueAdds, + QueueLatency: queueLatency, + ErrorsTotal: errorsTotal, + RetriesTotal: retriesTotal, + EventsReceived: eventsReceived, + EventsProcessed: eventsProcessed, + WorkloadsScanned: workloadsScanned, + WorkloadsMatched: workloadsMatched, } } func SetupPrometheusEndpoint() Collectors { collectors := NewCollectors() + prometheus.MustRegister(collectors.Reloaded) + prometheus.MustRegister(collectors.ReconcileTotal) + prometheus.MustRegister(collectors.ReconcileDuration) + prometheus.MustRegister(collectors.ActionTotal) + prometheus.MustRegister(collectors.ActionLatency) + prometheus.MustRegister(collectors.SkippedTotal) + prometheus.MustRegister(collectors.QueueDepth) + prometheus.MustRegister(collectors.QueueAdds) + prometheus.MustRegister(collectors.QueueLatency) + prometheus.MustRegister(collectors.ErrorsTotal) + prometheus.MustRegister(collectors.RetriesTotal) + prometheus.MustRegister(collectors.EventsReceived) + prometheus.MustRegister(collectors.EventsProcessed) + prometheus.MustRegister(collectors.WorkloadsScanned) + prometheus.MustRegister(collectors.WorkloadsMatched) if os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled" { prometheus.MustRegister(collectors.ReloadedByNamespace) diff --git a/pkg/kube/resourcemapper.go b/pkg/kube/resourcemapper.go index 286d408..bdb7858 100644 --- a/pkg/kube/resourcemapper.go +++ b/pkg/kube/resourcemapper.go @@ -8,8 +8,8 @@ import ( // ResourceMap are resources from where changes are going to be detected var ResourceMap = map[string]runtime.Object{ - "configmaps": &v1.ConfigMap{}, - "secrets": &v1.Secret{}, - "namespaces": &v1.Namespace{}, + "configmaps": &v1.ConfigMap{}, + "secrets": &v1.Secret{}, + "namespaces": &v1.Namespace{}, "secretproviderclasspodstatuses": &csiv1.SecretProviderClassPodStatus{}, } diff --git a/test/loadtest/README.md b/test/loadtest/README.md new file mode 100644 index 0000000..7182bb3 --- /dev/null +++ b/test/loadtest/README.md @@ -0,0 +1,544 @@ +# Reloader Load Test Framework + +This framework provides A/B comparison testing between two Reloader container images. + +## Overview + +The load test framework: +1. Creates a local kind cluster (1 control-plane + 6 worker nodes) +2. Deploys Prometheus for metrics collection +3. Loads the provided Reloader container images into the cluster +4. Runs standardized test scenarios (S1-S13) +5. Collects metrics via Prometheus scraping +6. Generates comparison reports with pass/fail criteria + +## Prerequisites + +- Docker or Podman +- kind (Kubernetes in Docker) +- kubectl +- Go 1.22+ + +## Building + +```bash +cd test/loadtest +go build -o loadtest ./cmd/loadtest +``` + +## Quick Start + +```bash +# Compare two published images (e.g., different versions) +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=stakater/reloader:v1.1.0 + +# Run a specific scenario +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=stakater/reloader:v1.1.0 \ + --scenario=S2 \ + --duration=120 + +# Test only a single image (no comparison) +./loadtest run --new-image=myregistry/reloader:dev + +# Use local images built with docker/podman +./loadtest run \ + --old-image=localhost/reloader:baseline \ + --new-image=localhost/reloader:feature-branch + +# Skip cluster creation (use existing kind cluster) +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=stakater/reloader:v1.1.0 \ + --skip-cluster + +# Run all scenarios in parallel on 4 clusters (faster execution) +./loadtest run \ + --new-image=localhost/reloader:dev \ + --parallelism=4 + +# Run all 13 scenarios in parallel (one cluster per scenario) +./loadtest run \ + --new-image=localhost/reloader:dev \ + --parallelism=13 + +# Generate report from existing results +./loadtest report --scenario=S2 --results-dir=./results +``` + +## Command Line Options + +### Run Command + +| Option | Description | Default | +|--------|-------------|---------| +| `--old-image=IMAGE` | Container image for "old" version | - | +| `--new-image=IMAGE` | Container image for "new" version | - | +| `--scenario=ID` | Test scenario: S1-S13 or "all" | all | +| `--duration=SECONDS` | Test duration in seconds | 60 | +| `--parallelism=N` | Run N scenarios in parallel on N kind clusters | 1 | +| `--skip-cluster` | Skip kind cluster creation (use existing, only for parallelism=1) | false | +| `--results-dir=DIR` | Directory for results | ./results | + +**Note:** At least one of `--old-image` or `--new-image` is required. Provide both for A/B comparison. + +### Report Command + +| Option | Description | Default | +|--------|-------------|---------| +| `--scenario=ID` | Scenario to report on (required) | - | +| `--results-dir=DIR` | Directory containing results | ./results | +| `--output=FILE` | Output file (default: stdout) | - | + +## Test Scenarios + +| ID | Name | Description | +|-----|-----------------------|-------------------------------------------------| +| S1 | Burst Updates | Many ConfigMap/Secret updates in quick succession | +| S2 | Fan-Out | One ConfigMap used by many (50) workloads | +| S3 | High Cardinality | Many CMs/Secrets across many namespaces | +| S4 | No-Op Updates | Updates that don't change data (annotation only)| +| S5 | Workload Churn | Deployments created/deleted rapidly | +| S6 | Controller Restart | Restart controller pod under load | +| S7 | API Pressure | Many concurrent update requests | +| S8 | Large Objects | ConfigMaps > 100KB | +| S9 | Multi-Workload Types | Tests all workload types (Deploy, STS, DS) | +| S10 | Secrets + Mixed | Secrets and mixed ConfigMap+Secret workloads | +| S11 | Annotation Strategy | Tests `--reload-strategy=annotations` | +| S12 | Pause & Resume | Tests pause-period during rapid updates | +| S13 | Complex References | Init containers, valueFrom, projected volumes | + +## Metrics Reference + +This section explains each metric collected during load tests, what it measures, and what different values might indicate. + +### Counter Metrics (Totals) + +#### `reconcile_total` +**What it measures:** The total number of reconciliation loops executed by the controller. + +**What it indicates:** +- **Higher in new vs old:** The new controller-runtime implementation may batch events differently. This is often expected behavior, not a problem. +- **Lower in new vs old:** Better event batching/deduplication. Controller-runtime's work queue naturally deduplicates events. +- **Expected behavior:** The new implementation typically has *fewer* reconciles due to intelligent event batching. + +#### `action_total` +**What it measures:** The total number of reload actions triggered (rolling restarts of Deployments/StatefulSets/DaemonSets). + +**What it indicates:** +- **Should match expected value:** Both implementations should trigger the same number of reloads for the same workload. +- **Lower than expected:** Some updates were missed - potential bug or race condition. +- **Higher than expected:** Duplicate reloads triggered - inefficiency but not data loss. + +#### `reload_executed_total` +**What it measures:** Successful reload operations executed, labeled by `success=true/false`. + +**What it indicates:** +- **`success=true` count:** Number of workloads successfully restarted. +- **`success=false` count:** Failed restart attempts (API errors, permission issues). +- **Should match `action_total`:** If significantly lower, reloads are failing. + +#### `workloads_scanned_total` +**What it measures:** Number of workloads (Deployments, etc.) scanned when checking for ConfigMap/Secret references. + +**What it indicates:** +- **High count:** Controller is scanning many workloads per reconcile. +- **Expected behavior:** Should roughly match the number of workloads × number of reconciles. +- **Optimization signal:** If very high, namespace filtering or label selectors could help. + +#### `workloads_matched_total` +**What it measures:** Number of workloads that matched (reference the changed ConfigMap/Secret). + +**What it indicates:** +- **Should match `reload_executed_total`:** Every matched workload should be reloaded. +- **Higher than reloads:** Some matched workloads weren't reloaded (potential issue). + +#### `errors_total` +**What it measures:** Total errors encountered, labeled by error type. + +**What it indicates:** +- **Should be 0:** Any errors indicate problems. +- **Common causes:** API server timeouts, RBAC issues, resource conflicts. +- **Critical metric:** Non-zero errors in production should be investigated. + +### API Efficiency Metrics (REST Client) + +These metrics track Kubernetes API server calls made by Reloader. Lower values indicate more efficient operation with less API server load. + +#### `rest_client_requests_total` +**What it measures:** Total number of HTTP requests made to the Kubernetes API server. + +**What it indicates:** +- **Lower is better:** Fewer API calls means less load on the API server. +- **High count:** May indicate inefficient caching or excessive reconciles. +- **Comparison use:** Shows overall API efficiency between implementations. + +#### `rest_client_requests_get` +**What it measures:** Number of GET requests (fetching individual resources or listings). + +**What it indicates:** +- **Includes:** Fetching ConfigMaps, Secrets, Deployments, etc. +- **Higher count:** More frequent resource fetching, possibly due to cache misses. +- **Expected behavior:** Controller-runtime's caching should reduce GET requests compared to direct API calls. + +#### `rest_client_requests_patch` +**What it measures:** Number of PATCH requests (partial updates to resources). + +**What it indicates:** +- **Used for:** Rolling restart annotations on workloads. +- **Should correlate with:** `reload_executed_total` - each reload typically requires one PATCH. +- **Lower is better:** Fewer patches means more efficient batching or deduplication. + +#### `rest_client_requests_put` +**What it measures:** Number of PUT requests (full resource updates). + +**What it indicates:** +- **Used for:** Full object replacements (less common than PATCH). +- **Should be low:** Most updates use PATCH for efficiency. +- **High count:** May indicate suboptimal update strategy. + +#### `rest_client_requests_errors` +**What it measures:** Number of failed API requests (4xx/5xx responses). + +**What it indicates:** +- **Should be 0:** Errors indicate API server issues or permission problems. +- **Common causes:** Rate limiting, RBAC issues, resource conflicts, network issues. +- **Non-zero:** Investigate API server logs and Reloader permissions. + +### Latency Metrics (Percentiles) + +All latency metrics are reported in **seconds**. The report shows p50 (median), p95, and p99 percentiles. + +#### `reconcile_duration (s)` +**What it measures:** Time spent inside each reconcile loop, from start to finish. + +**What it indicates:** +- **p50 (median):** Typical reconcile time. Should be < 100ms for good performance. +- **p95:** 95th percentile - only 5% of reconciles take longer than this. +- **p99:** 99th percentile - indicates worst-case performance. + +**Interpreting differences:** +- **New higher than old:** Controller-runtime reconciles may do more work per loop but run fewer times. Check `reconcile_total` - if it's lower, this is expected. +- **Minor differences (< 0.5s absolute):** Not significant for sub-second values. + +#### `action_latency (s)` +**What it measures:** End-to-end time from ConfigMap/Secret change detection to workload restart triggered. + +**What it indicates:** +- **This is the user-facing latency:** How long users wait for their config changes to take effect. +- **p50 < 1s:** Excellent - most changes apply within a second. +- **p95 < 5s:** Good - even under load, changes apply quickly. +- **p99 > 10s:** May need investigation - some changes take too long. + +**What affects this:** +- API server responsiveness +- Number of workloads to scan +- Concurrent updates competing for resources + +### Understanding the Report + +#### Report Columns + +``` +Metric Old New Expected Old✓ New✓ Status +------ --- --- -------- ---- ---- ------ +action_total 100.00 100.00 100 ✓ ✓ pass +action_latency_p95 (s) 0.15 0.04 - - - pass +``` + +- **Old/New:** Measured values from each implementation +- **Expected:** Known expected value (for throughput metrics) +- **Old✓/New✓:** Whether the value is within 15% of expected (✓ = yes, ✗ = no, - = no expected value) +- **Status:** pass/fail based on comparison thresholds + +#### Pass/Fail Logic + +| Metric Type | Pass Condition | +|-------------|----------------| +| Throughput (action_total, reload_executed_total) | New value within 15% of expected | +| Latency (p50, p95, p99) | New not more than threshold% worse than old, OR absolute difference < minimum threshold | +| Errors | New ≤ Old (ideally both 0) | +| API Efficiency (rest_client_requests_*) | New ≤ Old (lower is better), or New not more than 50% higher | + +#### Latency Thresholds + +Latency comparisons use both percentage AND absolute thresholds to avoid false failures: + +| Metric | Max % Worse | Min Absolute Diff | +|--------|-------------|-------------------| +| p50 | 100% | 0.5s | +| p95 | 100% | 1.0s | +| p99 | 100% | 1.0s | + +**Example:** If old p50 = 0.01s and new p50 = 0.08s: +- Percentage difference: +700% (would fail % check) +- Absolute difference: 0.07s (< 0.5s threshold) +- **Result: PASS** (both values are fast enough that the difference doesn't matter) + +### Resource Consumption Metrics + +These metrics track CPU, memory, and Go runtime resource usage. Lower values generally indicate more efficient operation. + +#### Memory Metrics + +| Metric | Description | Unit | +|--------|-------------|------| +| `memory_rss_mb_avg` | Average RSS (resident set size) memory | MB | +| `memory_rss_mb_max` | Peak RSS memory during test | MB | +| `memory_heap_mb_avg` | Average Go heap allocation | MB | +| `memory_heap_mb_max` | Peak Go heap allocation | MB | + +**What to watch for:** +- **High RSS:** May indicate memory leaks or inefficient caching +- **High heap:** Many objects being created (check GC metrics) +- **Growing over time:** Potential memory leak + +#### CPU Metrics + +| Metric | Description | Unit | +|--------|-------------|------| +| `cpu_cores_avg` | Average CPU usage rate | cores | +| `cpu_cores_max` | Peak CPU usage rate | cores | + +**What to watch for:** +- **High CPU:** Inefficient algorithms or excessive reconciles +- **Spiky max:** May indicate burst handling issues + +#### Go Runtime Metrics + +| Metric | Description | Unit | +|--------|-------------|------| +| `goroutines_avg` | Average goroutine count | count | +| `goroutines_max` | Peak goroutine count | count | +| `gc_pause_p99_ms` | 99th percentile GC pause time | ms | + +**What to watch for:** +- **High goroutines:** Potential goroutine leak or unbounded concurrency +- **High GC pause:** Large heap or allocation pressure + +### Scenario-Specific Expectations + +| Scenario | Key Metrics to Watch | Expected Behavior | +|----------|---------------------|-------------------| +| S1 (Burst) | action_latency_p99, cpu_cores_max, goroutines_max | Should handle bursts without queue backup | +| S2 (Fan-Out) | reconcile_total, workloads_matched, memory_rss_mb_max | One CM change → 50 workload reloads | +| S3 (High Cardinality) | reconcile_duration, memory_heap_mb_avg | Many namespaces shouldn't increase memory | +| S4 (No-Op) | action_total = 0, cpu_cores_avg should be low | Minimal resource usage for no-op | +| S5 (Churn) | errors_total, goroutines_avg | Graceful handling, no goroutine leak | +| S6 (Restart) | All metrics captured | Metrics survive controller restart | +| S7 (API Pressure) | errors_total, cpu_cores_max, goroutines_max | No errors under concurrent load | +| S8 (Large Objects) | memory_rss_mb_max, gc_pause_p99_ms | Large ConfigMaps don't cause OOM or GC issues | +| S9 (Multi-Workload) | reload_executed_total per type | All workload types (Deploy, STS, DS) reload | +| S10 (Secrets) | reload_executed_total, workloads_matched | Both Secrets and ConfigMaps trigger reloads | +| S11 (Annotation) | workload annotations present | Deployments get `last-reloaded-from` annotation | +| S12 (Pause) | reload_executed_total << updates | Pause-period reduces reload frequency | +| S13 (Complex) | reload_executed_total | All reference types trigger reloads | + +### Troubleshooting + +#### New implementation shows 0 for all metrics +- Check if Prometheus is scraping the new Reloader pod +- Verify pod annotations: `prometheus.io/scrape: "true"` +- Check Prometheus targets: `http://localhost:9091/targets` + +#### Metrics don't match expected values +- Verify test ran to completion (check logs) +- Ensure Prometheus scraped final metrics (18s wait after test) +- Check for pod restarts during test (metrics reset on restart - handled by `increase()`) + +#### High latency in new implementation +- Check Reloader pod resource limits +- Look for API server throttling in logs +- Compare `reconcile_total` - fewer reconciles with higher duration may be normal + +#### REST client errors are non-zero +- **Common causes:** + - Optional CRD schemes registered but CRDs not installed (e.g., Argo Rollouts, OpenShift DeploymentConfig) + - API server rate limiting under high load + - RBAC permissions missing for certain resource types +- **Argo Rollouts errors:** If you see ~4 errors per test, ensure `--enable-argo-rollouts=false` if not using Argo Rollouts +- **OpenShift errors:** Similarly, ensure DeploymentConfig support is disabled on non-OpenShift clusters + +#### REST client requests much higher in new implementation +- Check if caching is working correctly +- Look for excessive re-queuing in controller logs +- Compare `reconcile_total` - more reconciles naturally means more API calls + +## Report Format + +The report generator produces a comparison table with units and expected value indicators: + +``` +================================================================================ + RELOADER A/B COMPARISON REPORT +================================================================================ + +Scenario: S2 +Generated: 2026-01-03 14:30:00 +Status: PASS +Summary: All metrics within acceptable thresholds + +Test: S2: Fan-out test - 1 CM update triggers 50 deployment reloads + +-------------------------------------------------------------------------------- + METRIC COMPARISONS +-------------------------------------------------------------------------------- +(Old✓/New✓ = meets expected value within 15%) + +Metric Old New Expected Old✓ New✓ Status +------ --- --- -------- ---- ---- ------ +reconcile_total 50.00 25.00 - - - pass +reconcile_duration_p50 (s) 0.01 0.05 - - - pass +reconcile_duration_p95 (s) 0.02 0.15 - - - pass +action_total 50.00 50.00 50 ✓ ✓ pass +action_latency_p50 (s) 0.05 0.03 - - - pass +action_latency_p95 (s) 0.12 0.08 - - - pass +errors_total 0.00 0.00 - - - pass +reload_executed_total 50.00 50.00 50 ✓ ✓ pass +workloads_scanned_total 50.00 50.00 50 ✓ ✓ pass +workloads_matched_total 50.00 50.00 50 ✓ ✓ pass +rest_client_requests_total 850 720 - - - pass +rest_client_requests_get 500 420 - - - pass +rest_client_requests_patch 300 250 - - - pass +rest_client_requests_errors 0 0 - - - pass +``` + +Reports are saved to `results//report.txt` after each test. + +## Directory Structure + +``` +test/loadtest/ +├── cmd/ +│ └── loadtest/ # Unified CLI (run + report) +│ └── main.go +├── internal/ +│ ├── cluster/ # Kind cluster management +│ │ └── kind.go +│ ├── prometheus/ # Prometheus deployment & querying +│ │ └── prometheus.go +│ ├── reloader/ # Reloader deployment +│ │ └── deploy.go +│ └── scenarios/ # Test scenario implementations +│ └── scenarios.go +├── manifests/ +│ └── prometheus.yaml # Prometheus deployment manifest +├── results/ # Generated after tests +│ └── / +│ ├── old/ # Old version data +│ │ ├── *.json # Prometheus metric snapshots +│ │ └── reloader.log # Reloader pod logs +│ ├── new/ # New version data +│ │ ├── *.json # Prometheus metric snapshots +│ │ └── reloader.log # Reloader pod logs +│ ├── expected.json # Expected values from test +│ └── report.txt # Comparison report +├── go.mod +├── go.sum +└── README.md +``` + +## Building Local Images for Testing + +If you want to test local code changes: + +```bash +# Build the new Reloader image from current source +docker build -t localhost/reloader:dev -f Dockerfile . + +# Build from a different branch/commit +git checkout feature-branch +docker build -t localhost/reloader:feature -f Dockerfile . + +# Then run comparison +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=localhost/reloader:feature +``` + +## Interpreting Results + +### PASS +All metrics are within acceptable thresholds. The new implementation is comparable or better than the old one. + +### FAIL +One or more metrics exceeded thresholds. Review the specific metrics: +- **Latency degradation**: p95/p99 latencies are significantly higher +- **Missed reloads**: `reload_executed_total` differs significantly +- **Errors increased**: `errors_total` is higher in new version + +### Investigation + +If tests fail, check: +1. Pod logs: `kubectl logs -n reloader-new deployment/reloader` (or check `results//new/reloader.log`) +2. Resource usage: `kubectl top pods -n reloader-new` +3. Events: `kubectl get events -n reloader-test` + +## Parallel Execution + +The `--parallelism` option enables running scenarios on multiple kind clusters simultaneously, significantly reducing total test time. + +### How It Works + +1. **Multiple Clusters**: Creates N kind clusters named `reloader-loadtest-0`, `reloader-loadtest-1`, etc. +2. **Separate Prometheus**: Each cluster gets its own Prometheus instance with a unique port (9091, 9092, etc.) +3. **Worker Pool**: Scenarios are distributed to workers via a channel, with each worker running on its own cluster +4. **Independent Execution**: Each scenario runs in complete isolation with no resource contention + +### Usage + +```bash +# Run 4 scenarios at a time (creates 4 clusters) +./loadtest run --new-image=my-image:tag --parallelism=4 + +# Run all 13 scenarios in parallel (creates 13 clusters) +./loadtest run --new-image=my-image:tag --parallelism=13 --scenario=all +``` + +### Resource Requirements + +Parallel execution requires significant system resources: + +| Parallelism | Clusters | Est. Memory | Est. CPU | +|-------------|----------|-------------|----------| +| 1 (default) | 1 | ~4GB | 2-4 cores | +| 4 | 4 | ~16GB | 8-16 cores | +| 13 | 13 | ~52GB | 26-52 cores | + +### Notes + +- The `--skip-cluster` option is not supported with parallelism > 1 +- Each worker loads images independently, so initial setup takes longer +- All results are written to the same `--results-dir` with per-scenario subdirectories +- If a cluster setup fails, remaining workers continue with available clusters +- Parallelism automatically reduces to match scenario count if set higher + +## CI Integration + +### GitHub Actions + +Load tests can be triggered on pull requests by commenting `/loadtest`: + +``` +/loadtest +``` + +This will: +1. Build a container image from the PR branch +2. Run all load test scenarios against it +3. Post results as a PR comment +4. Upload detailed results as artifacts + +### Make Target + +Run load tests locally or in CI: + +```bash +# From repository root +make loadtest +``` + +This builds the container image and runs all scenarios with a 60-second duration. diff --git a/test/loadtest/cmd/loadtest/main.go b/test/loadtest/cmd/loadtest/main.go new file mode 100644 index 0000000..510ce0b --- /dev/null +++ b/test/loadtest/cmd/loadtest/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/stakater/Reloader/test/loadtest/internal/cmd" + +func main() { + cmd.Execute() +} diff --git a/test/loadtest/go.mod b/test/loadtest/go.mod new file mode 100644 index 0000000..e96ed76 --- /dev/null +++ b/test/loadtest/go.mod @@ -0,0 +1,52 @@ +module github.com/stakater/Reloader/test/loadtest + +go 1.25 + +require ( + github.com/spf13/cobra v1.8.1 + k8s.io/api v0.31.0 + k8s.io/apimachinery v0.31.0 + k8s.io/client-go v0.31.0 +) + +require ( + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.4 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/term v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/test/loadtest/go.sum b/test/loadtest/go.sum new file mode 100644 index 0000000..f4f0ad8 --- /dev/null +++ b/test/loadtest/go.sum @@ -0,0 +1,160 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= +github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo= +k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE= +k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc= +k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8= +k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/test/loadtest/internal/cluster/kind.go b/test/loadtest/internal/cluster/kind.go new file mode 100644 index 0000000..1fde314 --- /dev/null +++ b/test/loadtest/internal/cluster/kind.go @@ -0,0 +1,314 @@ +// Package cluster provides kind cluster management functionality. +package cluster + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "strings" + "time" +) + +// Config holds configuration for kind cluster operations. +type Config struct { + Name string + ContainerRuntime string // "docker" or "podman" + PortOffset int // Offset for host port mappings (for parallel clusters) +} + +// Manager handles kind cluster operations. +type Manager struct { + cfg Config +} + +// NewManager creates a new cluster manager. +func NewManager(cfg Config) *Manager { + return &Manager{cfg: cfg} +} + +// DetectContainerRuntime finds available container runtime. +// It checks if the runtime daemon is actually running, not just if the binary exists. +func DetectContainerRuntime() (string, error) { + if _, err := exec.LookPath("docker"); err == nil { + cmd := exec.Command("docker", "info") + if err := cmd.Run(); err == nil { + return "docker", nil + } + } + if _, err := exec.LookPath("podman"); err == nil { + cmd := exec.Command("podman", "info") + if err := cmd.Run(); err == nil { + return "podman", nil + } + } + return "", fmt.Errorf("neither docker nor podman is running") +} + +// Exists checks if the cluster already exists. +func (m *Manager) Exists() bool { + cmd := exec.Command("kind", "get", "clusters") + out, err := cmd.Output() + if err != nil { + return false + } + for _, line := range strings.Split(string(out), "\n") { + if strings.TrimSpace(line) == m.cfg.Name { + return true + } + } + return false +} + +// Delete deletes the kind cluster. +func (m *Manager) Delete(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "kind", "delete", "cluster", "--name", m.cfg.Name) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// Create creates a new kind cluster with optimized settings. +func (m *Manager) Create(ctx context.Context) error { + if m.cfg.ContainerRuntime == "podman" { + os.Setenv("KIND_EXPERIMENTAL_PROVIDER", "podman") + } + + if m.Exists() { + fmt.Printf("Cluster %s already exists, deleting...\n", m.cfg.Name) + if err := m.Delete(ctx); err != nil { + return fmt.Errorf("deleting existing cluster: %w", err) + } + } + + httpPort := 8080 + m.cfg.PortOffset + httpsPort := 8443 + m.cfg.PortOffset + + config := fmt.Sprintf(`kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +networking: + podSubnet: "10.244.0.0/16" + serviceSubnet: "10.96.0.0/16" +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" + - | + kind: ClusterConfiguration + apiServer: + extraArgs: + max-requests-inflight: "800" + max-mutating-requests-inflight: "400" + watch-cache-sizes: "configmaps#1000,secrets#1000,pods#1000" + controllerManager: + extraArgs: + kube-api-qps: "200" + kube-api-burst: "200" + scheduler: + extraArgs: + kube-api-qps: "200" + kube-api-burst: "200" + extraPortMappings: + - containerPort: 80 + hostPort: %d + protocol: TCP + - containerPort: 443 + hostPort: %d + protocol: TCP +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +`, httpPort, httpsPort) + cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", m.cfg.Name, "--config=-") + cmd.Stdin = strings.NewReader(config) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// GetKubeconfig returns the kubeconfig for the cluster. +func (m *Manager) GetKubeconfig() (string, error) { + cmd := exec.Command("kind", "get", "kubeconfig", "--name", m.cfg.Name) + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("getting kubeconfig: %w", err) + } + return string(out), nil +} + +// Context returns the kubectl context name for this cluster. +func (m *Manager) Context() string { + return "kind-" + m.cfg.Name +} + +// Name returns the cluster name. +func (m *Manager) Name() string { + return m.cfg.Name +} + +// LoadImage loads a container image into the kind cluster. +func (m *Manager) LoadImage(ctx context.Context, image string) error { + if !m.imageExistsLocally(image) { + fmt.Printf(" Image not found locally, pulling: %s\n", image) + pullCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image) + pullCmd.Stdout = os.Stdout + pullCmd.Stderr = os.Stderr + if err := pullCmd.Run(); err != nil { + return fmt.Errorf("pulling image %s: %w", image, err) + } + } else { + fmt.Printf(" Image found locally: %s\n", image) + } + + fmt.Printf(" Copying image to kind cluster...\n") + + if m.cfg.ContainerRuntime == "podman" { + tmpFile := fmt.Sprintf("/tmp/kind-image-%d.tar", time.Now().UnixNano()) + defer os.Remove(tmpFile) + + saveCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "save", image, "-o", tmpFile) + if err := saveCmd.Run(); err != nil { + return fmt.Errorf("saving image %s: %w", image, err) + } + + loadCmd := exec.CommandContext(ctx, "kind", "load", "image-archive", tmpFile, "--name", m.cfg.Name) + loadCmd.Stdout = os.Stdout + loadCmd.Stderr = os.Stderr + if err := loadCmd.Run(); err != nil { + return fmt.Errorf("loading image archive: %w", err) + } + } else { + loadCmd := exec.CommandContext(ctx, "kind", "load", "docker-image", image, "--name", m.cfg.Name) + loadCmd.Stdout = os.Stdout + loadCmd.Stderr = os.Stderr + if err := loadCmd.Run(); err != nil { + return fmt.Errorf("loading image %s: %w", image, err) + } + } + + return nil +} + +// imageExistsLocally checks if an image exists in the local container runtime. +func (m *Manager) imageExistsLocally(image string) bool { + cmd := exec.Command(m.cfg.ContainerRuntime, "image", "exists", image) + if err := cmd.Run(); err == nil { + return true + } + + cmd = exec.Command(m.cfg.ContainerRuntime, "image", "inspect", image) + if err := cmd.Run(); err == nil { + return true + } + + cmd = exec.Command(m.cfg.ContainerRuntime, "images", "--format", "{{.Repository}}:{{.Tag}}") + out, err := cmd.Output() + if err == nil { + for _, line := range strings.Split(string(out), "\n") { + if strings.TrimSpace(line) == image { + return true + } + } + } + + return false +} + +// PullImage pulls an image using the container runtime. +func (m *Manager) PullImage(ctx context.Context, image string) error { + cmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// ExecKubectl runs a kubectl command against the cluster. +func (m *Manager) ExecKubectl(ctx context.Context, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, "kubectl", args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("%w: %s", err, stderr.String()) + } + return stdout.Bytes(), nil +} diff --git a/test/loadtest/internal/cmd/report.go b/test/loadtest/internal/cmd/report.go new file mode 100644 index 0000000..7bf4cc6 --- /dev/null +++ b/test/loadtest/internal/cmd/report.go @@ -0,0 +1,860 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "log" + "math" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/spf13/cobra" +) + +var ( + reportScenario string + reportResultsDir string + reportOutputFile string + reportFormat string +) + +var reportCmd = &cobra.Command{ + Use: "report", + Short: "Generate comparison report for a scenario", + Long: `Generate a detailed report for a specific test scenario. + +Examples: + # Generate report for a scenario + loadtest report --scenario=S2 --results-dir=./results + + # Generate JSON report + loadtest report --scenario=S2 --format=json`, + Run: func(cmd *cobra.Command, args []string) { + reportCommand() + }, +} + +func init() { + reportCmd.Flags().StringVar(&reportScenario, "scenario", "", "Scenario to report on (required)") + reportCmd.Flags().StringVar(&reportResultsDir, "results-dir", "./results", "Directory containing results") + reportCmd.Flags().StringVar(&reportOutputFile, "output", "", "Output file (default: stdout)") + reportCmd.Flags().StringVar(&reportFormat, "format", "text", "Output format: text, json, markdown") + reportCmd.MarkFlagRequired("scenario") +} + +// PrometheusResponse represents a Prometheus API response for report parsing. +type PrometheusResponse struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric map[string]string `json:"metric"` + Value []interface{} `json:"value"` + } `json:"result"` + } `json:"data"` +} + +// MetricComparison represents the comparison of a single metric. +type MetricComparison struct { + Name string `json:"name"` + DisplayName string `json:"display_name"` + Unit string `json:"unit"` + IsCounter bool `json:"is_counter"` + OldValue float64 `json:"old_value"` + NewValue float64 `json:"new_value"` + Expected float64 `json:"expected"` + Difference float64 `json:"difference"` + DiffPct float64 `json:"diff_pct"` + Status string `json:"status"` + Threshold float64 `json:"threshold"` + OldMeetsExpected string `json:"old_meets_expected"` + NewMeetsExpected string `json:"new_meets_expected"` +} + +type metricInfo struct { + unit string + isCounter bool +} + +var metricInfoMap = map[string]metricInfo{ + "reconcile_total": {unit: "count", isCounter: true}, + "reconcile_duration_p50": {unit: "s", isCounter: false}, + "reconcile_duration_p95": {unit: "s", isCounter: false}, + "reconcile_duration_p99": {unit: "s", isCounter: false}, + "action_total": {unit: "count", isCounter: true}, + "action_latency_p50": {unit: "s", isCounter: false}, + "action_latency_p95": {unit: "s", isCounter: false}, + "action_latency_p99": {unit: "s", isCounter: false}, + "errors_total": {unit: "count", isCounter: true}, + "reload_executed_total": {unit: "count", isCounter: true}, + "workloads_scanned_total": {unit: "count", isCounter: true}, + "workloads_matched_total": {unit: "count", isCounter: true}, + "skipped_total_no_data_change": {unit: "count", isCounter: true}, + "rest_client_requests_total": {unit: "count", isCounter: true}, + "rest_client_requests_get": {unit: "count", isCounter: true}, + "rest_client_requests_patch": {unit: "count", isCounter: true}, + "rest_client_requests_put": {unit: "count", isCounter: true}, + "rest_client_requests_errors": {unit: "count", isCounter: true}, + "memory_rss_mb_avg": {unit: "MB", isCounter: false}, + "memory_rss_mb_max": {unit: "MB", isCounter: false}, + "memory_heap_mb_avg": {unit: "MB", isCounter: false}, + "memory_heap_mb_max": {unit: "MB", isCounter: false}, + "cpu_cores_avg": {unit: "cores", isCounter: false}, + "cpu_cores_max": {unit: "cores", isCounter: false}, + "goroutines_avg": {unit: "count", isCounter: false}, + "goroutines_max": {unit: "count", isCounter: false}, + "gc_pause_p99_ms": {unit: "ms", isCounter: false}, +} + +// ReportExpectedMetrics matches the expected metrics from test scenarios. +type ReportExpectedMetrics struct { + ActionTotal int `json:"action_total"` + ReloadExecutedTotal int `json:"reload_executed_total"` + ReconcileTotal int `json:"reconcile_total"` + WorkloadsScannedTotal int `json:"workloads_scanned_total"` + WorkloadsMatchedTotal int `json:"workloads_matched_total"` + SkippedTotal int `json:"skipped_total"` + Description string `json:"description"` +} + +// ScenarioReport represents the full report for a scenario. +type ScenarioReport struct { + Scenario string `json:"scenario"` + Timestamp time.Time `json:"timestamp"` + Comparisons []MetricComparison `json:"comparisons"` + OverallStatus string `json:"overall_status"` + Summary string `json:"summary"` + PassCriteria []string `json:"pass_criteria"` + FailedCriteria []string `json:"failed_criteria"` + Expected ReportExpectedMetrics `json:"expected"` + TestDescription string `json:"test_description"` +} + +// MetricType defines how to evaluate a metric. +type MetricType int + +const ( + LowerIsBetter MetricType = iota + ShouldMatch + HigherIsBetter + Informational +) + +type thresholdConfig struct { + maxDiff float64 + metricType MetricType + minAbsDiff float64 +} + +var thresholds = map[string]thresholdConfig{ + "reconcile_total": {maxDiff: 60.0, metricType: LowerIsBetter}, + "reconcile_duration_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5}, + "reconcile_duration_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "reconcile_duration_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "action_latency_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5}, + "action_latency_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "action_latency_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "errors_total": {maxDiff: 0.0, metricType: LowerIsBetter}, + "action_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "reload_executed_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "workloads_scanned_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "workloads_matched_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "skipped_total_no_data_change": {maxDiff: 20.0, metricType: ShouldMatch}, + "rest_client_requests_total": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50}, + "rest_client_requests_get": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50}, + "rest_client_requests_patch": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50}, + "rest_client_requests_put": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 20}, + "rest_client_requests_errors": {maxDiff: 0.0, metricType: LowerIsBetter, minAbsDiff: 100}, + "memory_rss_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20}, + "memory_rss_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 30}, + "memory_heap_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 15}, + "memory_heap_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20}, + "cpu_cores_avg": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.1}, + "cpu_cores_max": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.2}, + "goroutines_avg": {metricType: Informational}, + "goroutines_max": {metricType: Informational}, + "gc_pause_p99_ms": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 5}, +} + +func reportCommand() { + if reportScenario == "" { + log.Fatal("--scenario is required for report command") + } + + report, err := generateScenarioReport(reportScenario, reportResultsDir) + if err != nil { + log.Fatalf("Failed to generate report: %v", err) + } + + var output string + switch OutputFormat(reportFormat) { + case OutputFormatJSON: + output = renderScenarioReportJSON(report) + case OutputFormatMarkdown: + output = renderScenarioReportMarkdown(report) + default: + output = renderScenarioReport(report) + } + + if reportOutputFile != "" { + if err := os.WriteFile(reportOutputFile, []byte(output), 0644); err != nil { + log.Fatalf("Failed to write output file: %v", err) + } + log.Printf("Report written to %s", reportOutputFile) + } else { + fmt.Println(output) + } +} + +func generateScenarioReport(scenario, resultsDir string) (*ScenarioReport, error) { + oldDir := filepath.Join(resultsDir, scenario, "old") + newDir := filepath.Join(resultsDir, scenario, "new") + scenarioDir := filepath.Join(resultsDir, scenario) + + _, oldErr := os.Stat(oldDir) + _, newErr := os.Stat(newDir) + hasOld := oldErr == nil + hasNew := newErr == nil + isComparison := hasOld && hasNew + + singleVersion := "" + singleDir := "" + if !isComparison { + if hasNew { + singleVersion = "new" + singleDir = newDir + } else if hasOld { + singleVersion = "old" + singleDir = oldDir + } else { + return nil, fmt.Errorf("no results found in %s", scenarioDir) + } + } + + report := &ScenarioReport{ + Scenario: scenario, + Timestamp: time.Now(), + } + + expectedPath := filepath.Join(scenarioDir, "expected.json") + if data, err := os.ReadFile(expectedPath); err == nil { + if err := json.Unmarshal(data, &report.Expected); err != nil { + log.Printf("Warning: Could not parse expected metrics: %v", err) + } else { + report.TestDescription = report.Expected.Description + } + } + + if !isComparison { + return generateSingleVersionReport(report, singleDir, singleVersion, scenario) + } + + metricsToCompare := []struct { + name string + file string + selector func(data PrometheusResponse) float64 + }{ + {"reconcile_total", "reloader_reconcile_total.json", sumAllValues}, + {"reconcile_duration_p50", "reconcile_p50.json", getFirstValue}, + {"reconcile_duration_p95", "reconcile_p95.json", getFirstValue}, + {"reconcile_duration_p99", "reconcile_p99.json", getFirstValue}, + {"action_total", "reloader_action_total.json", sumAllValues}, + {"action_latency_p50", "action_p50.json", getFirstValue}, + {"action_latency_p95", "action_p95.json", getFirstValue}, + {"action_latency_p99", "action_p99.json", getFirstValue}, + {"errors_total", "reloader_errors_total.json", sumAllValues}, + {"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues}, + {"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues}, + {"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues}, + {"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue}, + {"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue}, + {"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue}, + {"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue}, + {"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue}, + {"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB}, + {"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB}, + {"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB}, + {"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB}, + {"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue}, + {"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue}, + {"goroutines_avg", "goroutines_avg.json", getFirstValue}, + {"goroutines_max", "goroutines_max.json", getFirstValue}, + {"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs}, + } + + expectedValues := map[string]float64{ + "action_total": float64(report.Expected.ActionTotal), + "reload_executed_total": float64(report.Expected.ReloadExecutedTotal), + "reconcile_total": float64(report.Expected.ReconcileTotal), + "workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal), + "workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal), + "skipped_total": float64(report.Expected.SkippedTotal), + } + + metricValues := make(map[string]struct{ old, new, expected float64 }) + + for _, m := range metricsToCompare { + oldData, err := loadMetricFile(filepath.Join(oldDir, m.file)) + if err != nil { + log.Printf("Warning: Could not load old metric %s: %v", m.name, err) + continue + } + + newData, err := loadMetricFile(filepath.Join(newDir, m.file)) + if err != nil { + log.Printf("Warning: Could not load new metric %s: %v", m.name, err) + continue + } + + oldValue := m.selector(oldData) + newValue := m.selector(newData) + expected := expectedValues[m.name] + + metricValues[m.name] = struct{ old, new, expected float64 }{oldValue, newValue, expected} + } + + newMeetsActionExpected := false + newReconcileIsZero := false + isChurnScenario := scenario == "S5" + if v, ok := metricValues["action_total"]; ok && v.expected > 0 { + tolerance := v.expected * 0.15 + newMeetsActionExpected = math.Abs(v.new-v.expected) <= tolerance + } + if v, ok := metricValues["reconcile_total"]; ok { + newReconcileIsZero = v.new == 0 + } + + for _, m := range metricsToCompare { + v, ok := metricValues[m.name] + if !ok { + continue + } + + comparison := compareMetricWithExpected(m.name, v.old, v.new, v.expected) + + if strings.HasPrefix(m.name, "rest_client_requests") { + if newMeetsActionExpected && comparison.Status != "pass" { + if oldMeets, ok := metricValues["action_total"]; ok { + oldTolerance := oldMeets.expected * 0.15 + oldMissed := math.Abs(oldMeets.old-oldMeets.expected) > oldTolerance + if oldMissed { + comparison.Status = "pass" + } + } + } + if newReconcileIsZero && comparison.Status != "pass" { + comparison.Status = "pass" + } + } + + if isChurnScenario { + if m.name == "errors_total" { + if v.new < 50 && v.old < 50 { + comparison.Status = "pass" + } else if v.new <= v.old*1.5 { + comparison.Status = "pass" + } + } + if m.name == "action_total" || m.name == "reload_executed_total" { + if v.old > 0 { + diff := math.Abs(v.new-v.old) / v.old * 100 + if diff <= 20 { + comparison.Status = "pass" + } + } else if v.new > 0 { + comparison.Status = "pass" + } + } + } + + report.Comparisons = append(report.Comparisons, comparison) + + if comparison.Status == "pass" { + report.PassCriteria = append(report.PassCriteria, m.name) + } else if comparison.Status == "fail" { + report.FailedCriteria = append(report.FailedCriteria, m.name) + } + } + + if len(report.FailedCriteria) == 0 { + report.OverallStatus = "PASS" + report.Summary = "All metrics within acceptable thresholds" + } else { + report.OverallStatus = "FAIL" + report.Summary = fmt.Sprintf("%d metrics failed: %s", + len(report.FailedCriteria), + strings.Join(report.FailedCriteria, ", ")) + } + + return report, nil +} + +func generateSingleVersionReport(report *ScenarioReport, dataDir, version, scenario string) (*ScenarioReport, error) { + metricsToCollect := []struct { + name string + file string + selector func(data PrometheusResponse) float64 + }{ + {"reconcile_total", "reloader_reconcile_total.json", sumAllValues}, + {"reconcile_duration_p50", "reconcile_p50.json", getFirstValue}, + {"reconcile_duration_p95", "reconcile_p95.json", getFirstValue}, + {"reconcile_duration_p99", "reconcile_p99.json", getFirstValue}, + {"action_total", "reloader_action_total.json", sumAllValues}, + {"action_latency_p50", "action_p50.json", getFirstValue}, + {"action_latency_p95", "action_p95.json", getFirstValue}, + {"action_latency_p99", "action_p99.json", getFirstValue}, + {"errors_total", "reloader_errors_total.json", sumAllValues}, + {"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues}, + {"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues}, + {"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues}, + {"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue}, + {"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue}, + {"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue}, + {"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue}, + {"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue}, + {"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB}, + {"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB}, + {"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB}, + {"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB}, + {"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue}, + {"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue}, + {"goroutines_avg", "goroutines_avg.json", getFirstValue}, + {"goroutines_max", "goroutines_max.json", getFirstValue}, + {"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs}, + } + + expectedValues := map[string]float64{ + "action_total": float64(report.Expected.ActionTotal), + "reload_executed_total": float64(report.Expected.ReloadExecutedTotal), + "reconcile_total": float64(report.Expected.ReconcileTotal), + "workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal), + "workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal), + "skipped_total": float64(report.Expected.SkippedTotal), + } + + for _, m := range metricsToCollect { + data, err := loadMetricFile(filepath.Join(dataDir, m.file)) + if err != nil { + log.Printf("Warning: Could not load metric %s: %v", m.name, err) + continue + } + + value := m.selector(data) + expected := expectedValues[m.name] + + info := metricInfoMap[m.name] + if info.unit == "" { + info = metricInfo{unit: "count", isCounter: true} + } + + displayName := m.name + if info.unit != "count" { + displayName = fmt.Sprintf("%s (%s)", m.name, info.unit) + } + + status := "info" + meetsExp := "-" + + if expected > 0 { + meetsExp = meetsExpected(value, expected) + threshold, ok := thresholds[m.name] + if ok && threshold.metricType == ShouldMatch { + if meetsExp == "✓" { + status = "pass" + report.PassCriteria = append(report.PassCriteria, m.name) + } else { + status = "fail" + report.FailedCriteria = append(report.FailedCriteria, m.name) + } + } + } + + if info.isCounter { + value = math.Round(value) + } + + report.Comparisons = append(report.Comparisons, MetricComparison{ + Name: m.name, + DisplayName: displayName, + Unit: info.unit, + IsCounter: info.isCounter, + OldValue: 0, + NewValue: value, + Expected: expected, + OldMeetsExpected: "-", + NewMeetsExpected: meetsExp, + Status: status, + }) + } + + if len(report.FailedCriteria) == 0 { + report.OverallStatus = "PASS" + report.Summary = fmt.Sprintf("Single-version test (%s) completed successfully", version) + } else { + report.OverallStatus = "FAIL" + report.Summary = fmt.Sprintf("%d metrics failed: %s", + len(report.FailedCriteria), + strings.Join(report.FailedCriteria, ", ")) + } + + return report, nil +} + +func loadMetricFile(path string) (PrometheusResponse, error) { + var resp PrometheusResponse + data, err := os.ReadFile(path) + if err != nil { + return resp, err + } + err = json.Unmarshal(data, &resp) + return resp, err +} + +func sumAllValues(data PrometheusResponse) float64 { + var sum float64 + for _, result := range data.Data.Result { + if len(result.Value) >= 2 { + if v, ok := result.Value[1].(string); ok { + var f float64 + fmt.Sscanf(v, "%f", &f) + sum += f + } + } + } + return sum +} + +func sumSuccessValues(data PrometheusResponse) float64 { + var sum float64 + for _, result := range data.Data.Result { + if result.Metric["success"] == "true" { + if len(result.Value) >= 2 { + if v, ok := result.Value[1].(string); ok { + var f float64 + fmt.Sscanf(v, "%f", &f) + sum += f + } + } + } + } + return sum +} + +func getFirstValue(data PrometheusResponse) float64 { + if len(data.Data.Result) > 0 && len(data.Data.Result[0].Value) >= 2 { + if v, ok := data.Data.Result[0].Value[1].(string); ok { + var f float64 + fmt.Sscanf(v, "%f", &f) + return f + } + } + return 0 +} + +func bytesToMB(data PrometheusResponse) float64 { + bytes := getFirstValue(data) + return bytes / (1024 * 1024) +} + +func secondsToMs(data PrometheusResponse) float64 { + seconds := getFirstValue(data) + return seconds * 1000 +} + +func meetsExpected(value, expected float64) string { + if expected == 0 { + return "-" + } + tolerance := expected * 0.15 + if math.Abs(value-expected) <= tolerance { + return "✓" + } + return "✗" +} + +func compareMetricWithExpected(name string, oldValue, newValue, expected float64) MetricComparison { + diff := newValue - oldValue + absDiff := math.Abs(diff) + var diffPct float64 + if oldValue != 0 { + diffPct = (diff / oldValue) * 100 + } else if newValue != 0 { + diffPct = 100 + } + + threshold, ok := thresholds[name] + if !ok { + threshold = thresholdConfig{maxDiff: 10.0, metricType: ShouldMatch} + } + + info := metricInfoMap[name] + if info.unit == "" { + info = metricInfo{unit: "count", isCounter: true} + } + displayName := name + if info.unit != "count" { + displayName = fmt.Sprintf("%s (%s)", name, info.unit) + } + + if info.isCounter { + oldValue = math.Round(oldValue) + newValue = math.Round(newValue) + } + + status := "pass" + oldMeetsExp := meetsExpected(oldValue, expected) + newMeetsExp := meetsExpected(newValue, expected) + + isNewMetric := info.isCounter && oldValue == 0 && newValue > 0 && expected == 0 + + if isNewMetric { + status = "info" + } else if expected > 0 && threshold.metricType == ShouldMatch { + if newMeetsExp == "✗" { + status = "fail" + } + } else { + switch threshold.metricType { + case LowerIsBetter: + if threshold.minAbsDiff > 0 && absDiff < threshold.minAbsDiff { + status = "pass" + } else if diffPct > threshold.maxDiff { + status = "fail" + } + case HigherIsBetter: + if diffPct < -threshold.maxDiff { + status = "fail" + } + case ShouldMatch: + if math.Abs(diffPct) > threshold.maxDiff { + status = "fail" + } + case Informational: + status = "info" + } + } + + return MetricComparison{ + Name: name, + DisplayName: displayName, + Unit: info.unit, + IsCounter: info.isCounter, + Expected: expected, + OldMeetsExpected: oldMeetsExp, + NewMeetsExpected: newMeetsExp, + OldValue: oldValue, + NewValue: newValue, + Difference: diff, + DiffPct: diffPct, + Status: status, + Threshold: threshold.maxDiff, + } +} + +func renderScenarioReport(report *ScenarioReport) string { + var sb strings.Builder + + isSingleVersion := true + for _, c := range report.Comparisons { + if c.OldValue != 0 { + isSingleVersion = false + break + } + } + + sb.WriteString("\n") + sb.WriteString("================================================================================\n") + if isSingleVersion { + sb.WriteString(" RELOADER TEST REPORT\n") + } else { + sb.WriteString(" RELOADER A/B COMPARISON REPORT\n") + } + sb.WriteString("================================================================================\n\n") + + fmt.Fprintf(&sb, "Scenario: %s\n", report.Scenario) + fmt.Fprintf(&sb, "Generated: %s\n", report.Timestamp.Format("2006-01-02 15:04:05")) + fmt.Fprintf(&sb, "Status: %s\n", report.OverallStatus) + fmt.Fprintf(&sb, "Summary: %s\n", report.Summary) + + if report.TestDescription != "" { + fmt.Fprintf(&sb, "Test: %s\n", report.TestDescription) + } + + if report.Expected.ActionTotal > 0 { + sb.WriteString("\n--------------------------------------------------------------------------------\n") + sb.WriteString(" EXPECTED VALUES\n") + sb.WriteString("--------------------------------------------------------------------------------\n") + fmt.Fprintf(&sb, "Expected Action Total: %d\n", report.Expected.ActionTotal) + fmt.Fprintf(&sb, "Expected Reload Executed Total: %d\n", report.Expected.ReloadExecutedTotal) + if report.Expected.SkippedTotal > 0 { + fmt.Fprintf(&sb, "Expected Skipped Total: %d\n", report.Expected.SkippedTotal) + } + } + + sb.WriteString("\n--------------------------------------------------------------------------------\n") + if isSingleVersion { + sb.WriteString(" METRICS\n") + } else { + sb.WriteString(" METRIC COMPARISONS\n") + } + sb.WriteString("--------------------------------------------------------------------------------\n") + + if isSingleVersion { + sb.WriteString("(✓ = meets expected value within 15%)\n\n") + fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n", + "Metric", "Value", "Expected", "Met?", "Status") + fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n", + "------", "-----", "--------", "----", "------") + + for _, c := range report.Comparisons { + if c.IsCounter { + if c.Expected > 0 { + fmt.Fprintf(&sb, "%-32s %12.0f %10.0f %5s %8s\n", + c.DisplayName, c.NewValue, c.Expected, + c.NewMeetsExpected, c.Status) + } else { + fmt.Fprintf(&sb, "%-32s %12.0f %10s %5s %8s\n", + c.DisplayName, c.NewValue, "-", + c.NewMeetsExpected, c.Status) + } + } else { + fmt.Fprintf(&sb, "%-32s %12.4f %10s %5s %8s\n", + c.DisplayName, c.NewValue, "-", + c.NewMeetsExpected, c.Status) + } + } + } else { + sb.WriteString("(Old✓/New✓ = meets expected value within 15%)\n\n") + + fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n", + "Metric", "Old", "New", "Expected", "Old✓", "New✓", "Status") + fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n", + "------", "---", "---", "--------", "----", "----", "------") + + for _, c := range report.Comparisons { + if c.IsCounter { + if c.Expected > 0 { + fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10.0f %5s %5s %8s\n", + c.DisplayName, c.OldValue, c.NewValue, c.Expected, + c.OldMeetsExpected, c.NewMeetsExpected, c.Status) + } else { + fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10s %5s %5s %8s\n", + c.DisplayName, c.OldValue, c.NewValue, "-", + c.OldMeetsExpected, c.NewMeetsExpected, c.Status) + } + } else { + fmt.Fprintf(&sb, "%-32s %12.4f %12.4f %10s %5s %5s %8s\n", + c.DisplayName, c.OldValue, c.NewValue, "-", + c.OldMeetsExpected, c.NewMeetsExpected, c.Status) + } + } + } + + sb.WriteString("\n--------------------------------------------------------------------------------\n") + sb.WriteString(" PASS/FAIL CRITERIA\n") + sb.WriteString("--------------------------------------------------------------------------------\n\n") + + fmt.Fprintf(&sb, "Passed (%d):\n", len(report.PassCriteria)) + for _, p := range report.PassCriteria { + fmt.Fprintf(&sb, " ✓ %s\n", p) + } + + if len(report.FailedCriteria) > 0 { + fmt.Fprintf(&sb, "\nFailed (%d):\n", len(report.FailedCriteria)) + for _, f := range report.FailedCriteria { + fmt.Fprintf(&sb, " ✗ %s\n", f) + } + } + + sb.WriteString("\n--------------------------------------------------------------------------------\n") + sb.WriteString(" THRESHOLDS USED\n") + sb.WriteString("--------------------------------------------------------------------------------\n\n") + + fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n", + "Metric", "Max Diff%", "Min Abs Diff", "Direction") + fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n", + "------", "---------", "------------", "---------") + + var names []string + for name := range thresholds { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + t := thresholds[name] + var direction string + switch t.metricType { + case LowerIsBetter: + direction = "lower is better" + case HigherIsBetter: + direction = "higher is better" + case ShouldMatch: + direction = "should match" + case Informational: + direction = "info only" + } + minAbsDiff := "-" + if t.minAbsDiff > 0 { + minAbsDiff = fmt.Sprintf("%.1f", t.minAbsDiff) + } + fmt.Fprintf(&sb, "%-35s %9.1f%% %15s %18s\n", + name, t.maxDiff, minAbsDiff, direction) + } + + sb.WriteString("\n================================================================================\n") + + return sb.String() +} + +func renderScenarioReportJSON(report *ScenarioReport) string { + data, err := json.MarshalIndent(report, "", " ") + if err != nil { + return fmt.Sprintf(`{"error": "%s"}`, err.Error()) + } + return string(data) +} + +func renderScenarioReportMarkdown(report *ScenarioReport) string { + var sb strings.Builder + + emoji := "✅" + if report.OverallStatus != "PASS" { + emoji = "❌" + } + + sb.WriteString(fmt.Sprintf("## %s %s: %s\n\n", emoji, report.Scenario, report.OverallStatus)) + + if report.TestDescription != "" { + sb.WriteString(fmt.Sprintf("> %s\n\n", report.TestDescription)) + } + + sb.WriteString("| Metric | Value | Expected | Status |\n") + sb.WriteString("|--------|------:|:--------:|:------:|\n") + + keyMetrics := []string{"action_total", "reload_executed_total", "errors_total", "reconcile_total"} + for _, name := range keyMetrics { + for _, c := range report.Comparisons { + if c.Name == name { + value := fmt.Sprintf("%.0f", c.NewValue) + expected := "-" + if c.Expected > 0 { + expected = fmt.Sprintf("%.0f", c.Expected) + } + status := "✅" + if c.Status == "fail" { + status = "❌" + } else if c.Status == "info" { + status = "ℹ️" + } + sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s |\n", c.DisplayName, value, expected, status)) + break + } + } + } + + return sb.String() +} diff --git a/test/loadtest/internal/cmd/root.go b/test/loadtest/internal/cmd/root.go new file mode 100644 index 0000000..46e9be5 --- /dev/null +++ b/test/loadtest/internal/cmd/root.go @@ -0,0 +1,43 @@ +package cmd + +import ( + "os" + + "github.com/spf13/cobra" +) + +const ( + // DefaultClusterName is the default kind cluster name. + DefaultClusterName = "reloader-loadtest" + // TestNamespace is the namespace used for test resources. + TestNamespace = "reloader-test" +) + +// OutputFormat defines the output format for reports. +type OutputFormat string + +const ( + OutputFormatText OutputFormat = "text" + OutputFormatJSON OutputFormat = "json" + OutputFormatMarkdown OutputFormat = "markdown" +) + +// rootCmd is the base command. +var rootCmd = &cobra.Command{ + Use: "loadtest", + Short: "Reloader Load Test CLI", + Long: `A CLI tool for running A/B comparison load tests on Reloader.`, +} + +func init() { + rootCmd.AddCommand(runCmd) + rootCmd.AddCommand(reportCmd) + rootCmd.AddCommand(summaryCmd) +} + +// Execute runs the root command. +func Execute() { + if err := rootCmd.Execute(); err != nil { + os.Exit(1) + } +} diff --git a/test/loadtest/internal/cmd/run.go b/test/loadtest/internal/cmd/run.go new file mode 100644 index 0000000..c78e579 --- /dev/null +++ b/test/loadtest/internal/cmd/run.go @@ -0,0 +1,648 @@ +package cmd + +import ( + "context" + "fmt" + "log" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "sync" + "syscall" + "time" + + "github.com/spf13/cobra" + "github.com/stakater/Reloader/test/loadtest/internal/cluster" + "github.com/stakater/Reloader/test/loadtest/internal/prometheus" + "github.com/stakater/Reloader/test/loadtest/internal/reloader" + "github.com/stakater/Reloader/test/loadtest/internal/scenarios" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +// RunConfig holds CLI configuration for the run command. +type RunConfig struct { + OldImage string + NewImage string + Scenario string + Duration int + SkipCluster bool + SkipImageLoad bool + ClusterName string + ResultsDir string + ManifestsDir string + Parallelism int +} + +// workerContext holds all resources for a single worker (cluster + prometheus). +type workerContext struct { + id int + clusterMgr *cluster.Manager + promMgr *prometheus.Manager + kubeClient kubernetes.Interface + kubeContext string + runtime string +} + +var runCfg RunConfig + +var runCmd = &cobra.Command{ + Use: "run", + Short: "Run A/B comparison tests", + Long: `Run load tests comparing old and new versions of Reloader. + +Examples: + # Compare two images + loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=stakater/reloader:v1.1.0 + + # Run specific scenario + loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=localhost/reloader:dev --scenario=S2 + + # Test single image (no comparison) + loadtest run --new-image=localhost/reloader:test + + # Run all scenarios in parallel on 4 clusters + loadtest run --new-image=localhost/reloader:test --parallelism=4`, + Run: func(cmd *cobra.Command, args []string) { + runCommand() + }, +} + +func init() { + runCmd.Flags().StringVar(&runCfg.OldImage, "old-image", "", "Container image for \"old\" version (required for comparison)") + runCmd.Flags().StringVar(&runCfg.NewImage, "new-image", "", "Container image for \"new\" version (required for comparison)") + runCmd.Flags().StringVar(&runCfg.Scenario, "scenario", "all", "Test scenario: S1-S13 or \"all\"") + runCmd.Flags().IntVar(&runCfg.Duration, "duration", 60, "Test duration in seconds") + runCmd.Flags().IntVar(&runCfg.Parallelism, "parallelism", 1, "Run N scenarios in parallel on N clusters") + runCmd.Flags().BoolVar(&runCfg.SkipCluster, "skip-cluster", false, "Skip kind cluster creation (use existing)") + runCmd.Flags().BoolVar(&runCfg.SkipImageLoad, "skip-image-load", false, "Skip loading images into kind (use when images already loaded)") + runCmd.Flags().StringVar(&runCfg.ClusterName, "cluster-name", DefaultClusterName, "Kind cluster name") + runCmd.Flags().StringVar(&runCfg.ResultsDir, "results-dir", "./results", "Directory for results") + runCmd.Flags().StringVar(&runCfg.ManifestsDir, "manifests-dir", "", "Directory containing manifests (auto-detected if not set)") +} + +func runCommand() { + if runCfg.ManifestsDir == "" { + execPath, _ := os.Executable() + execDir := filepath.Dir(execPath) + runCfg.ManifestsDir = filepath.Join(execDir, "..", "..", "manifests") + if _, err := os.Stat(runCfg.ManifestsDir); os.IsNotExist(err) { + runCfg.ManifestsDir = "./manifests" + } + } + + if runCfg.Parallelism < 1 { + runCfg.Parallelism = 1 + } + + if runCfg.OldImage == "" && runCfg.NewImage == "" { + log.Fatal("At least one of --old-image or --new-image is required") + } + + runOld := runCfg.OldImage != "" + runNew := runCfg.NewImage != "" + runBoth := runOld && runNew + + log.Printf("Configuration:") + log.Printf(" Scenario: %s", runCfg.Scenario) + log.Printf(" Duration: %ds", runCfg.Duration) + log.Printf(" Parallelism: %d", runCfg.Parallelism) + if runCfg.OldImage != "" { + log.Printf(" Old image: %s", runCfg.OldImage) + } + if runCfg.NewImage != "" { + log.Printf(" New image: %s", runCfg.NewImage) + } + + runtime, err := cluster.DetectContainerRuntime() + if err != nil { + log.Fatalf("Failed to detect container runtime: %v", err) + } + log.Printf(" Container runtime: %s", runtime) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + log.Println("Received shutdown signal...") + cancel() + }() + + var scenariosToRun []string + if runCfg.Scenario == "all" { + scenariosToRun = []string{"S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S11", "S12", "S13"} + } else { + // Split comma-separated scenarios (e.g., "S1,S4,S6") + for _, s := range strings.Split(runCfg.Scenario, ",") { + if trimmed := strings.TrimSpace(s); trimmed != "" { + scenariosToRun = append(scenariosToRun, trimmed) + } + } + } + + if runCfg.SkipCluster && runCfg.Parallelism > 1 { + log.Fatal("--skip-cluster is not supported with --parallelism > 1") + } + + if runCfg.Parallelism > 1 { + runParallel(ctx, runCfg, scenariosToRun, runtime, runOld, runNew, runBoth) + return + } + + runSequential(ctx, runCfg, scenariosToRun, runtime, runOld, runNew, runBoth) +} + +func runSequential(ctx context.Context, cfg RunConfig, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) { + clusterMgr := cluster.NewManager(cluster.Config{ + Name: cfg.ClusterName, + ContainerRuntime: runtime, + }) + + if cfg.SkipCluster { + log.Printf("Skipping cluster creation (using existing cluster: %s)", cfg.ClusterName) + if !clusterMgr.Exists() { + log.Fatalf("Cluster %s does not exist. Remove --skip-cluster to create it.", cfg.ClusterName) + } + } else { + log.Println("Creating kind cluster...") + if err := clusterMgr.Create(ctx); err != nil { + log.Fatalf("Failed to create cluster: %v", err) + } + } + + promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml") + promMgr := prometheus.NewManager(promManifest) + + log.Println("Installing Prometheus...") + if err := promMgr.Deploy(ctx); err != nil { + log.Fatalf("Failed to deploy Prometheus: %v", err) + } + + if err := promMgr.StartPortForward(ctx); err != nil { + log.Fatalf("Failed to start Prometheus port-forward: %v", err) + } + defer promMgr.StopPortForward() + + if cfg.SkipImageLoad { + log.Println("Skipping image loading (--skip-image-load)") + } else { + log.Println("Loading images into kind cluster...") + if runOld { + log.Printf("Loading old image: %s", cfg.OldImage) + if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil { + log.Fatalf("Failed to load old image: %v", err) + } + } + if runNew { + log.Printf("Loading new image: %s", cfg.NewImage) + if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil { + log.Fatalf("Failed to load new image: %v", err) + } + } + + log.Println("Pre-loading test images...") + testImage := "gcr.io/google-containers/busybox:1.27" + clusterMgr.LoadImage(ctx, testImage) + } + + kubeClient, err := getKubeClient("") + if err != nil { + log.Fatalf("Failed to create kubernetes client: %v", err) + } + + for _, scenarioID := range scenariosToRun { + log.Printf("========================================") + log.Printf("=== Starting scenario %s ===", scenarioID) + log.Printf("========================================") + + cleanupTestNamespaces(ctx, "") + reloader.CleanupByVersion(ctx, "old", "") + reloader.CleanupByVersion(ctx, "new", "") + + if err := promMgr.Reset(ctx); err != nil { + log.Printf("Warning: failed to reset Prometheus: %v", err) + } + + createTestNamespace(ctx, "") + + if runOld { + oldMgr := reloader.NewManager(reloader.Config{ + Version: "old", + Image: cfg.OldImage, + }) + + if err := oldMgr.Deploy(ctx); err != nil { + log.Printf("Failed to deploy old Reloader: %v", err) + continue + } + + if err := promMgr.WaitForTarget(ctx, oldMgr.Job(), 60*time.Second); err != nil { + log.Printf("Warning: %v", err) + log.Println("Proceeding anyway, but metrics may be incomplete") + } + + runScenario(ctx, kubeClient, scenarioID, "old", cfg.OldImage, cfg.Duration, cfg.ResultsDir) + collectMetrics(ctx, promMgr, oldMgr.Job(), scenarioID, "old", cfg.ResultsDir) + collectLogs(ctx, oldMgr, scenarioID, "old", cfg.ResultsDir) + + if runBoth { + cleanupTestNamespaces(ctx, "") + oldMgr.Cleanup(ctx) + promMgr.Reset(ctx) + createTestNamespace(ctx, "") + } + } + + if runNew { + newMgr := reloader.NewManager(reloader.Config{ + Version: "new", + Image: cfg.NewImage, + }) + + if err := newMgr.Deploy(ctx); err != nil { + log.Printf("Failed to deploy new Reloader: %v", err) + continue + } + + if err := promMgr.WaitForTarget(ctx, newMgr.Job(), 60*time.Second); err != nil { + log.Printf("Warning: %v", err) + log.Println("Proceeding anyway, but metrics may be incomplete") + } + + runScenario(ctx, kubeClient, scenarioID, "new", cfg.NewImage, cfg.Duration, cfg.ResultsDir) + collectMetrics(ctx, promMgr, newMgr.Job(), scenarioID, "new", cfg.ResultsDir) + collectLogs(ctx, newMgr, scenarioID, "new", cfg.ResultsDir) + } + + generateReport(scenarioID, cfg.ResultsDir, runBoth) + log.Printf("=== Scenario %s complete ===", scenarioID) + } + + log.Println("Load test complete!") + log.Printf("Results available in: %s", cfg.ResultsDir) +} + +func runParallel(ctx context.Context, cfg RunConfig, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) { + numWorkers := cfg.Parallelism + if numWorkers > len(scenariosToRun) { + numWorkers = len(scenariosToRun) + log.Printf("Reducing parallelism to %d (number of scenarios)", numWorkers) + } + + log.Printf("Starting parallel execution with %d workers", numWorkers) + + workers := make([]*workerContext, numWorkers) + var setupWg sync.WaitGroup + setupErrors := make(chan error, numWorkers) + + log.Println("Setting up worker clusters...") + for i := range numWorkers { + setupWg.Add(1) + go func(workerID int) { + defer setupWg.Done() + worker, err := setupWorker(ctx, cfg, workerID, runtime, runOld, runNew) + if err != nil { + setupErrors <- fmt.Errorf("worker %d setup failed: %w", workerID, err) + return + } + workers[workerID] = worker + }(i) + } + + setupWg.Wait() + close(setupErrors) + + for err := range setupErrors { + log.Printf("Error: %v", err) + } + + readyWorkers := 0 + for _, w := range workers { + if w != nil { + readyWorkers++ + } + } + if readyWorkers == 0 { + log.Fatal("No workers ready, aborting") + } + if readyWorkers < numWorkers { + log.Printf("Warning: only %d/%d workers ready", readyWorkers, numWorkers) + } + + defer func() { + log.Println("Cleaning up worker clusters...") + for _, w := range workers { + if w != nil { + w.promMgr.StopPortForward() + } + } + }() + + scenarioCh := make(chan string, len(scenariosToRun)) + for _, s := range scenariosToRun { + scenarioCh <- s + } + close(scenarioCh) + + var resultsMu sync.Mutex + completedScenarios := make([]string, 0, len(scenariosToRun)) + + var wg sync.WaitGroup + for _, worker := range workers { + if worker == nil { + continue + } + wg.Add(1) + go func(w *workerContext) { + defer wg.Done() + for scenarioID := range scenarioCh { + select { + case <-ctx.Done(): + return + default: + } + + log.Printf("[Worker %d] Starting scenario %s", w.id, scenarioID) + + cleanupTestNamespaces(ctx, w.kubeContext) + reloader.CleanupByVersion(ctx, "old", w.kubeContext) + reloader.CleanupByVersion(ctx, "new", w.kubeContext) + + if err := w.promMgr.Reset(ctx); err != nil { + log.Printf("[Worker %d] Warning: failed to reset Prometheus: %v", w.id, err) + } + + createTestNamespace(ctx, w.kubeContext) + + if runOld { + runVersionOnWorker(ctx, w, cfg, scenarioID, "old", cfg.OldImage, runBoth) + } + + if runNew { + runVersionOnWorker(ctx, w, cfg, scenarioID, "new", cfg.NewImage, false) + } + + generateReport(scenarioID, cfg.ResultsDir, runBoth) + + resultsMu.Lock() + completedScenarios = append(completedScenarios, scenarioID) + resultsMu.Unlock() + + log.Printf("[Worker %d] Scenario %s complete", w.id, scenarioID) + } + }(worker) + } + + wg.Wait() + + log.Println("Load test complete!") + log.Printf("Completed %d/%d scenarios", len(completedScenarios), len(scenariosToRun)) + log.Printf("Results available in: %s", cfg.ResultsDir) +} + +func setupWorker(ctx context.Context, cfg RunConfig, workerID int, runtime string, runOld, runNew bool) (*workerContext, error) { + workerName := fmt.Sprintf("%s-%d", DefaultClusterName, workerID) + promPort := 9091 + workerID + + log.Printf("[Worker %d] Creating cluster %s (ports %d/%d)...", workerID, workerName, 8080+workerID, 8443+workerID) + + clusterMgr := cluster.NewManager(cluster.Config{ + Name: workerName, + ContainerRuntime: runtime, + PortOffset: workerID, + }) + + if err := clusterMgr.Create(ctx); err != nil { + return nil, fmt.Errorf("creating cluster: %w", err) + } + + kubeContext := clusterMgr.Context() + + promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml") + promMgr := prometheus.NewManagerWithPort(promManifest, promPort, kubeContext) + + log.Printf("[Worker %d] Installing Prometheus (port %d)...", workerID, promPort) + if err := promMgr.Deploy(ctx); err != nil { + return nil, fmt.Errorf("deploying prometheus: %w", err) + } + + if err := promMgr.StartPortForward(ctx); err != nil { + return nil, fmt.Errorf("starting prometheus port-forward: %w", err) + } + + if cfg.SkipImageLoad { + log.Printf("[Worker %d] Skipping image loading (--skip-image-load)", workerID) + } else { + log.Printf("[Worker %d] Loading images...", workerID) + if runOld { + if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil { + log.Printf("[Worker %d] Warning: failed to load old image: %v", workerID, err) + } + } + if runNew { + if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil { + log.Printf("[Worker %d] Warning: failed to load new image: %v", workerID, err) + } + } + + testImage := "gcr.io/google-containers/busybox:1.27" + clusterMgr.LoadImage(ctx, testImage) + } + + kubeClient, err := getKubeClient(kubeContext) + if err != nil { + return nil, fmt.Errorf("creating kubernetes client: %w", err) + } + + log.Printf("[Worker %d] Ready", workerID) + return &workerContext{ + id: workerID, + clusterMgr: clusterMgr, + promMgr: promMgr, + kubeClient: kubeClient, + kubeContext: kubeContext, + runtime: runtime, + }, nil +} + +func runVersionOnWorker(ctx context.Context, w *workerContext, cfg RunConfig, scenarioID, version, image string, cleanupAfter bool) { + mgr := reloader.NewManager(reloader.Config{ + Version: version, + Image: image, + }) + mgr.SetKubeContext(w.kubeContext) + + if err := mgr.Deploy(ctx); err != nil { + log.Printf("[Worker %d] Failed to deploy %s Reloader: %v", w.id, version, err) + return + } + + if err := w.promMgr.WaitForTarget(ctx, mgr.Job(), 60*time.Second); err != nil { + log.Printf("[Worker %d] Warning: %v", w.id, err) + log.Printf("[Worker %d] Proceeding anyway, but metrics may be incomplete", w.id) + } + + runScenario(ctx, w.kubeClient, scenarioID, version, image, cfg.Duration, cfg.ResultsDir) + collectMetrics(ctx, w.promMgr, mgr.Job(), scenarioID, version, cfg.ResultsDir) + collectLogs(ctx, mgr, scenarioID, version, cfg.ResultsDir) + + if cleanupAfter { + cleanupTestNamespaces(ctx, w.kubeContext) + mgr.Cleanup(ctx) + w.promMgr.Reset(ctx) + createTestNamespace(ctx, w.kubeContext) + } +} + +func runScenario(ctx context.Context, client kubernetes.Interface, scenarioID, version, image string, duration int, resultsDir string) { + runner, ok := scenarios.Registry[scenarioID] + if !ok { + log.Printf("Unknown scenario: %s", scenarioID) + return + } + + if s6, ok := runner.(*scenarios.ControllerRestartScenario); ok { + s6.ReloaderVersion = version + } + + if s11, ok := runner.(*scenarios.AnnotationStrategyScenario); ok { + s11.Image = image + } + + log.Printf("Running scenario %s (%s): %s", scenarioID, version, runner.Description()) + + if ctx.Err() != nil { + log.Printf("WARNING: Parent context already done: %v", ctx.Err()) + } + + timeout := time.Duration(duration)*time.Second + 5*time.Minute + log.Printf("Creating scenario context with timeout: %v (duration=%ds)", timeout, duration) + + scenarioCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + expected, err := runner.Run(scenarioCtx, client, TestNamespace, time.Duration(duration)*time.Second) + if err != nil { + log.Printf("Scenario %s failed: %v", scenarioID, err) + } + + scenarios.WriteExpectedMetrics(scenarioID, resultsDir, expected) +} + +func collectMetrics(ctx context.Context, promMgr *prometheus.Manager, job, scenarioID, version, resultsDir string) { + log.Printf("Waiting 5s for Reloader to finish processing events...") + time.Sleep(5 * time.Second) + + log.Printf("Waiting 8s for Prometheus to scrape final metrics...") + time.Sleep(8 * time.Second) + + log.Printf("Collecting metrics for %s...", version) + outputDir := filepath.Join(resultsDir, scenarioID, version) + if err := promMgr.CollectMetrics(ctx, job, outputDir, scenarioID); err != nil { + log.Printf("Failed to collect metrics: %v", err) + } +} + +func collectLogs(ctx context.Context, mgr *reloader.Manager, scenarioID, version, resultsDir string) { + log.Printf("Collecting logs for %s...", version) + logPath := filepath.Join(resultsDir, scenarioID, version, "reloader.log") + if err := mgr.CollectLogs(ctx, logPath); err != nil { + log.Printf("Failed to collect logs: %v", err) + } +} + +func generateReport(scenarioID, resultsDir string, isComparison bool) { + if isComparison { + log.Println("Generating comparison report...") + } else { + log.Println("Generating single-version report...") + } + + reportPath := filepath.Join(resultsDir, scenarioID, "report.txt") + + cmd := exec.Command(os.Args[0], "report", + fmt.Sprintf("--scenario=%s", scenarioID), + fmt.Sprintf("--results-dir=%s", resultsDir), + fmt.Sprintf("--output=%s", reportPath)) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Run() + + if data, err := os.ReadFile(reportPath); err == nil { + fmt.Println(string(data)) + } + + log.Printf("Report saved to: %s", reportPath) +} + +func getKubeClient(kubeContext string) (kubernetes.Interface, error) { + kubeconfig := os.Getenv("KUBECONFIG") + if kubeconfig == "" { + home, _ := os.UserHomeDir() + kubeconfig = filepath.Join(home, ".kube", "config") + } + + loadingRules := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig} + configOverrides := &clientcmd.ConfigOverrides{} + if kubeContext != "" { + configOverrides.CurrentContext = kubeContext + } + + kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) + config, err := kubeConfig.ClientConfig() + if err != nil { + return nil, err + } + + return kubernetes.NewForConfig(config) +} + +func createTestNamespace(ctx context.Context, kubeContext string) { + args := []string{"create", "namespace", TestNamespace, "--dry-run=client", "-o", "yaml"} + if kubeContext != "" { + args = append([]string{"--context", kubeContext}, args...) + } + cmd := exec.CommandContext(ctx, "kubectl", args...) + out, _ := cmd.Output() + + applyArgs := []string{"apply", "-f", "-"} + if kubeContext != "" { + applyArgs = append([]string{"--context", kubeContext}, applyArgs...) + } + applyCmd := exec.CommandContext(ctx, "kubectl", applyArgs...) + applyCmd.Stdin = strings.NewReader(string(out)) + applyCmd.Run() +} + +func cleanupTestNamespaces(ctx context.Context, kubeContext string) { + log.Println("Cleaning up test resources...") + + namespaces := []string{TestNamespace} + for i := range 10 { + namespaces = append(namespaces, fmt.Sprintf("%s-%d", TestNamespace, i)) + } + + for _, ns := range namespaces { + args := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"} + if kubeContext != "" { + args = append([]string{"--context", kubeContext}, args...) + } + exec.CommandContext(ctx, "kubectl", args...).Run() + } + + time.Sleep(2 * time.Second) + + for _, ns := range namespaces { + args := []string{"delete", "pods", "--all", "-n", ns, "--grace-period=0", "--force"} + if kubeContext != "" { + args = append([]string{"--context", kubeContext}, args...) + } + exec.CommandContext(ctx, "kubectl", args...).Run() + } +} + diff --git a/test/loadtest/internal/cmd/summary.go b/test/loadtest/internal/cmd/summary.go new file mode 100644 index 0000000..bda40fb --- /dev/null +++ b/test/loadtest/internal/cmd/summary.go @@ -0,0 +1,251 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "log" + "os" + "sort" + "strings" + "time" + + "github.com/spf13/cobra" +) + +var ( + summaryResultsDir string + summaryOutputFile string + summaryFormat string + summaryTestType string +) + +var summaryCmd = &cobra.Command{ + Use: "summary", + Short: "Generate summary across all scenarios (for CI)", + Long: `Generate an aggregated summary report across all test scenarios. + +Examples: + # Generate markdown summary for CI + loadtest summary --results-dir=./results --format=markdown`, + Run: func(cmd *cobra.Command, args []string) { + summaryCommand() + }, +} + +func init() { + summaryCmd.Flags().StringVar(&summaryResultsDir, "results-dir", "./results", "Directory containing results") + summaryCmd.Flags().StringVar(&summaryOutputFile, "output", "", "Output file (default: stdout)") + summaryCmd.Flags().StringVar(&summaryFormat, "format", "markdown", "Output format: text, json, markdown") + summaryCmd.Flags().StringVar(&summaryTestType, "test-type", "full", "Test type label: quick, full") +} + +// SummaryReport aggregates results from multiple scenarios. +type SummaryReport struct { + Timestamp time.Time `json:"timestamp"` + TestType string `json:"test_type"` + PassCount int `json:"pass_count"` + FailCount int `json:"fail_count"` + TotalCount int `json:"total_count"` + Scenarios []ScenarioSummary `json:"scenarios"` +} + +// ScenarioSummary provides a brief summary of a single scenario. +type ScenarioSummary struct { + ID string `json:"id"` + Status string `json:"status"` + Description string `json:"description"` + ActionTotal float64 `json:"action_total"` + ActionExp float64 `json:"action_expected"` + ErrorsTotal float64 `json:"errors_total"` +} + +func summaryCommand() { + summary, err := generateSummaryReport(summaryResultsDir, summaryTestType) + if err != nil { + log.Fatalf("Failed to generate summary: %v", err) + } + + var output string + switch OutputFormat(summaryFormat) { + case OutputFormatJSON: + output = renderSummaryJSON(summary) + case OutputFormatText: + output = renderSummaryText(summary) + default: + output = renderSummaryMarkdown(summary) + } + + if summaryOutputFile != "" { + if err := os.WriteFile(summaryOutputFile, []byte(output), 0644); err != nil { + log.Fatalf("Failed to write output file: %v", err) + } + log.Printf("Summary written to %s", summaryOutputFile) + } else { + fmt.Print(output) + } + + if summary.FailCount > 0 { + os.Exit(1) + } +} + +func generateSummaryReport(resultsDir, testType string) (*SummaryReport, error) { + summary := &SummaryReport{ + Timestamp: time.Now(), + TestType: testType, + } + + entries, err := os.ReadDir(resultsDir) + if err != nil { + return nil, fmt.Errorf("failed to read results directory: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() || !strings.HasPrefix(entry.Name(), "S") { + continue + } + + scenarioID := entry.Name() + report, err := generateScenarioReport(scenarioID, resultsDir) + if err != nil { + log.Printf("Warning: failed to load scenario %s: %v", scenarioID, err) + continue + } + + scenarioSummary := ScenarioSummary{ + ID: scenarioID, + Status: report.OverallStatus, + Description: report.TestDescription, + } + + for _, c := range report.Comparisons { + switch c.Name { + case "action_total": + scenarioSummary.ActionTotal = c.NewValue + scenarioSummary.ActionExp = c.Expected + case "errors_total": + scenarioSummary.ErrorsTotal = c.NewValue + } + } + + summary.Scenarios = append(summary.Scenarios, scenarioSummary) + summary.TotalCount++ + if report.OverallStatus == "PASS" { + summary.PassCount++ + } else { + summary.FailCount++ + } + } + + sort.Slice(summary.Scenarios, func(i, j int) bool { + return naturalSort(summary.Scenarios[i].ID, summary.Scenarios[j].ID) + }) + + return summary, nil +} + +func naturalSort(a, b string) bool { + var aNum, bNum int + fmt.Sscanf(a, "S%d", &aNum) + fmt.Sscanf(b, "S%d", &bNum) + return aNum < bNum +} + +func renderSummaryJSON(summary *SummaryReport) string { + data, err := json.MarshalIndent(summary, "", " ") + if err != nil { + return fmt.Sprintf(`{"error": "%s"}`, err.Error()) + } + return string(data) +} + +func renderSummaryText(summary *SummaryReport) string { + var sb strings.Builder + + sb.WriteString("================================================================================\n") + sb.WriteString(" LOAD TEST SUMMARY\n") + sb.WriteString("================================================================================\n\n") + + passRate := 0 + if summary.TotalCount > 0 { + passRate = summary.PassCount * 100 / summary.TotalCount + } + + fmt.Fprintf(&sb, "Test Type: %s\n", summary.TestType) + fmt.Fprintf(&sb, "Results: %d/%d passed (%d%%)\n\n", summary.PassCount, summary.TotalCount, passRate) + + fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8s\n", "ID", "Status", "Description", "Actions", "Errors") + fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8s\n", "------", "--------", strings.Repeat("-", 45), "----------", "--------") + + for _, s := range summary.Scenarios { + desc := s.Description + if len(desc) > 45 { + desc = desc[:42] + "..." + } + actions := fmt.Sprintf("%.0f", s.ActionTotal) + if s.ActionExp > 0 { + actions = fmt.Sprintf("%.0f/%.0f", s.ActionTotal, s.ActionExp) + } + fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8.0f\n", s.ID, s.Status, desc, actions, s.ErrorsTotal) + } + + sb.WriteString("\n================================================================================\n") + return sb.String() +} + +func renderSummaryMarkdown(summary *SummaryReport) string { + var sb strings.Builder + + emoji := "✅" + title := "ALL TESTS PASSED" + if summary.FailCount > 0 { + emoji = "❌" + title = fmt.Sprintf("%d TEST(S) FAILED", summary.FailCount) + } else if summary.TotalCount == 0 { + emoji = "⚠️" + title = "NO RESULTS" + } + + sb.WriteString(fmt.Sprintf("## %s Load Test Results: %s\n\n", emoji, title)) + + if summary.TestType == "quick" { + sb.WriteString("> 🚀 **Quick Test** (S1, S4, S6) — Use `/loadtest` for full suite\n\n") + } + + passRate := 0 + if summary.TotalCount > 0 { + passRate = summary.PassCount * 100 / summary.TotalCount + } + sb.WriteString(fmt.Sprintf("**%d/%d passed** (%d%%)\n\n", summary.PassCount, summary.TotalCount, passRate)) + + sb.WriteString("| | Scenario | Description | Actions | Errors |\n") + sb.WriteString("|:-:|:--------:|-------------|:-------:|:------:|\n") + + for _, s := range summary.Scenarios { + icon := "✅" + if s.Status != "PASS" { + icon = "❌" + } + + desc := s.Description + if len(desc) > 45 { + desc = desc[:42] + "..." + } + + actions := fmt.Sprintf("%.0f", s.ActionTotal) + if s.ActionExp > 0 { + actions = fmt.Sprintf("%.0f/%.0f", s.ActionTotal, s.ActionExp) + } + + errors := fmt.Sprintf("%.0f", s.ErrorsTotal) + if s.ErrorsTotal > 0 { + errors = fmt.Sprintf("⚠️ %.0f", s.ErrorsTotal) + } + + sb.WriteString(fmt.Sprintf("| %s | **%s** | %s | %s | %s |\n", icon, s.ID, desc, actions, errors)) + } + + sb.WriteString("\n📦 **[Download detailed results](../artifacts)**\n") + + return sb.String() +} diff --git a/test/loadtest/internal/prometheus/prometheus.go b/test/loadtest/internal/prometheus/prometheus.go new file mode 100644 index 0000000..b9bf755 --- /dev/null +++ b/test/loadtest/internal/prometheus/prometheus.go @@ -0,0 +1,429 @@ +// Package prometheus provides Prometheus deployment and querying functionality. +package prometheus + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// Manager handles Prometheus operations. +type Manager struct { + manifestPath string + portForward *exec.Cmd + localPort int + kubeContext string +} + +// NewManager creates a new Prometheus manager. +func NewManager(manifestPath string) *Manager { + return &Manager{ + manifestPath: manifestPath, + localPort: 9091, + } +} + +// NewManagerWithPort creates a Prometheus manager with a custom port. +func NewManagerWithPort(manifestPath string, port int, kubeContext string) *Manager { + return &Manager{ + manifestPath: manifestPath, + localPort: port, + kubeContext: kubeContext, + } +} + +// kubectl returns kubectl args with optional context +func (m *Manager) kubectl(args ...string) []string { + if m.kubeContext != "" { + return append([]string{"--context", m.kubeContext}, args...) + } + return args +} + +// Deploy deploys Prometheus to the cluster. +func (m *Manager) Deploy(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("create", "namespace", "monitoring", "--dry-run=client", "-o", "yaml")...) + out, err := cmd.Output() + if err != nil { + return fmt.Errorf("generating namespace yaml: %w", err) + } + + applyCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", "-")...) + applyCmd.Stdin = strings.NewReader(string(out)) + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("applying namespace: %w", err) + } + + applyCmd = exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", m.manifestPath)...) + applyCmd.Stdout = os.Stdout + applyCmd.Stderr = os.Stderr + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("applying prometheus manifest: %w", err) + } + + fmt.Println("Waiting for Prometheus to be ready...") + waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod", + "-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...) + waitCmd.Stdout = os.Stdout + waitCmd.Stderr = os.Stderr + if err := waitCmd.Run(); err != nil { + return fmt.Errorf("waiting for prometheus: %w", err) + } + + return nil +} + +// StartPortForward starts port-forwarding to Prometheus. +func (m *Manager) StartPortForward(ctx context.Context) error { + m.StopPortForward() + + m.portForward = exec.CommandContext(ctx, "kubectl", m.kubectl("port-forward", + "-n", "monitoring", "svc/prometheus", fmt.Sprintf("%d:9090", m.localPort))...) + + if err := m.portForward.Start(); err != nil { + return fmt.Errorf("starting port-forward: %w", err) + } + + for i := 0; i < 30; i++ { + time.Sleep(time.Second) + if m.isAccessible() { + fmt.Printf("Prometheus accessible at http://localhost:%d\n", m.localPort) + return nil + } + } + + return fmt.Errorf("prometheus port-forward not ready after 30s") +} + +// StopPortForward stops the port-forward process. +func (m *Manager) StopPortForward() { + if m.portForward != nil && m.portForward.Process != nil { + m.portForward.Process.Kill() + m.portForward = nil + } + exec.Command("pkill", "-f", fmt.Sprintf("kubectl port-forward.*prometheus.*%d", m.localPort)).Run() +} + +// Reset restarts Prometheus to clear all metrics. +func (m *Manager) Reset(ctx context.Context) error { + m.StopPortForward() + + cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("delete", "pod", "-n", "monitoring", + "-l", "app=prometheus", "--grace-period=0", "--force")...) + cmd.Run() + + fmt.Println("Waiting for Prometheus to restart...") + waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod", + "-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...) + if err := waitCmd.Run(); err != nil { + return fmt.Errorf("waiting for prometheus restart: %w", err) + } + + if err := m.StartPortForward(ctx); err != nil { + return err + } + + fmt.Println("Waiting 5s for Prometheus to initialize scraping...") + time.Sleep(5 * time.Second) + + return nil +} + +func (m *Manager) isAccessible() bool { + conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", m.localPort), 2*time.Second) + if err != nil { + return false + } + conn.Close() + + resp, err := http.Get(fmt.Sprintf("http://localhost:%d/api/v1/status/config", m.localPort)) + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode == 200 +} + +// URL returns the local Prometheus URL. +func (m *Manager) URL() string { + return fmt.Sprintf("http://localhost:%d", m.localPort) +} + +// WaitForTarget waits for a specific job to be scraped by Prometheus. +func (m *Manager) WaitForTarget(ctx context.Context, job string, timeout time.Duration) error { + fmt.Printf("Waiting for Prometheus to discover and scrape job '%s'...\n", job) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if m.isTargetHealthy(job) { + fmt.Printf("Prometheus is scraping job '%s'\n", job) + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(2 * time.Second): + } + } + + m.printTargetStatus(job) + return fmt.Errorf("timeout waiting for Prometheus to scrape job '%s'", job) +} + +// isTargetHealthy checks if a job has at least one healthy target. +func (m *Manager) isTargetHealthy(job string) bool { + resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL())) + if err != nil { + return false + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return false + } + + var result struct { + Status string `json:"status"` + Data struct { + ActiveTargets []struct { + Labels map[string]string `json:"labels"` + Health string `json:"health"` + } `json:"activeTargets"` + } `json:"data"` + } + + if err := json.Unmarshal(body, &result); err != nil { + return false + } + + for _, target := range result.Data.ActiveTargets { + if target.Labels["job"] == job && target.Health == "up" { + return true + } + } + return false +} + +// printTargetStatus prints debug info about targets. +func (m *Manager) printTargetStatus(job string) { + resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL())) + if err != nil { + fmt.Printf("Failed to get targets: %v\n", err) + return + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + + var result struct { + Data struct { + ActiveTargets []struct { + Labels map[string]string `json:"labels"` + Health string `json:"health"` + LastError string `json:"lastError"` + ScrapeURL string `json:"scrapeUrl"` + } `json:"activeTargets"` + } `json:"data"` + } + + if err := json.Unmarshal(body, &result); err != nil { + fmt.Printf("Failed to parse targets: %v\n", err) + return + } + + fmt.Printf("Prometheus targets for job '%s':\n", job) + found := false + for _, target := range result.Data.ActiveTargets { + if target.Labels["job"] == job { + found = true + fmt.Printf(" - %s: health=%s, lastError=%s\n", + target.ScrapeURL, target.Health, target.LastError) + } + } + if !found { + fmt.Printf(" No targets found for job '%s'\n", job) + fmt.Printf(" Available jobs: ") + jobs := make(map[string]bool) + for _, target := range result.Data.ActiveTargets { + jobs[target.Labels["job"]] = true + } + for j := range jobs { + fmt.Printf("%s ", j) + } + fmt.Println() + } +} + +// HasMetrics checks if the specified job has any metrics available. +func (m *Manager) HasMetrics(ctx context.Context, job string) bool { + query := fmt.Sprintf(`up{job="%s"}`, job) + result, err := m.Query(ctx, query) + if err != nil { + return false + } + return len(result.Data.Result) > 0 && result.Data.Result[0].Value[1] == "1" +} + +// QueryResponse represents a Prometheus query response. +type QueryResponse struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric map[string]string `json:"metric"` + Value []interface{} `json:"value"` + } `json:"result"` + } `json:"data"` +} + +// Query executes a PromQL query and returns the response. +func (m *Manager) Query(ctx context.Context, query string) (*QueryResponse, error) { + u := fmt.Sprintf("%s/api/v1/query?query=%s", m.URL(), url.QueryEscape(query)) + + req, err := http.NewRequestWithContext(ctx, "GET", u, nil) + if err != nil { + return nil, err + } + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("querying prometheus: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading response: %w", err) + } + + var result QueryResponse + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("parsing response: %w", err) + } + + return &result, nil +} + +// CollectMetrics collects all metrics for a scenario and writes to output directory. +func (m *Manager) CollectMetrics(ctx context.Context, job, outputDir, scenario string) error { + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("creating output directory: %w", err) + } + + timeRange := "10m" + + // For S6 (restart scenario), use increase() to handle counter resets + useIncrease := scenario == "S6" + + counterMetrics := []string{ + "reloader_reconcile_total", + "reloader_action_total", + "reloader_skipped_total", + "reloader_errors_total", + "reloader_events_received_total", + "reloader_workloads_scanned_total", + "reloader_workloads_matched_total", + "reloader_reload_executed_total", + } + + for _, metric := range counterMetrics { + var query string + if useIncrease { + query = fmt.Sprintf(`sum(increase(%s{job="%s"}[%s])) by (success, reason)`, metric, job, timeRange) + } else { + query = fmt.Sprintf(`sum(%s{job="%s"}) by (success, reason)`, metric, job) + } + + if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, metric+".json")); err != nil { + fmt.Printf("Warning: failed to collect %s: %v\n", metric, err) + } + } + + histogramMetrics := []struct { + name string + prefix string + }{ + {"reloader_reconcile_duration_seconds", "reconcile"}, + {"reloader_action_latency_seconds", "action"}, + } + + for _, hm := range histogramMetrics { + for _, pct := range []int{50, 95, 99} { + quantile := float64(pct) / 100 + query := fmt.Sprintf(`histogram_quantile(%v, sum(rate(%s_bucket{job="%s"}[%s])) by (le))`, + quantile, hm.name, job, timeRange) + outFile := filepath.Join(outputDir, fmt.Sprintf("%s_p%d.json", hm.prefix, pct)) + if err := m.queryAndSave(ctx, query, outFile); err != nil { + fmt.Printf("Warning: failed to collect %s p%d: %v\n", hm.name, pct, err) + } + } + } + + restQueries := map[string]string{ + "rest_client_requests_total.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s"})`, job), + "rest_client_requests_get.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="GET"})`, job), + "rest_client_requests_patch.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PATCH"})`, job), + "rest_client_requests_put.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PUT"})`, job), + "rest_client_requests_errors.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",code=~"[45].."}) or vector(0)`, job), + } + + for filename, query := range restQueries { + if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil { + fmt.Printf("Warning: failed to collect %s: %v\n", filename, err) + } + } + + resourceQueries := map[string]string{ + "memory_rss_bytes_avg.json": fmt.Sprintf(`avg_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange), + "memory_rss_bytes_max.json": fmt.Sprintf(`max_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange), + "memory_rss_bytes_cur.json": fmt.Sprintf(`process_resident_memory_bytes{job="%s"}`, job), + + "memory_heap_bytes_avg.json": fmt.Sprintf(`avg_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange), + "memory_heap_bytes_max.json": fmt.Sprintf(`max_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange), + + "cpu_usage_cores_avg.json": fmt.Sprintf(`rate(process_cpu_seconds_total{job="%s"}[%s])`, job, timeRange), + "cpu_usage_cores_max.json": fmt.Sprintf(`max_over_time(rate(process_cpu_seconds_total{job="%s"}[1m])[%s:1m])`, job, timeRange), + + "goroutines_avg.json": fmt.Sprintf(`avg_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange), + "goroutines_max.json": fmt.Sprintf(`max_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange), + "goroutines_cur.json": fmt.Sprintf(`go_goroutines{job="%s"}`, job), + + "gc_duration_seconds_p99.json": fmt.Sprintf(`histogram_quantile(0.99, sum(rate(go_gc_duration_seconds_bucket{job="%s"}[%s])) by (le))`, job, timeRange), + + "threads_cur.json": fmt.Sprintf(`go_threads{job="%s"}`, job), + } + + for filename, query := range resourceQueries { + if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil { + fmt.Printf("Warning: failed to collect %s: %v\n", filename, err) + } + } + + return nil +} + +func (m *Manager) queryAndSave(ctx context.Context, query, outputPath string) error { + result, err := m.Query(ctx, query) + if err != nil { + emptyResult := `{"status":"success","data":{"resultType":"vector","result":[]}}` + return os.WriteFile(outputPath, []byte(emptyResult), 0644) + } + + data, err := json.MarshalIndent(result, "", " ") + if err != nil { + return err + } + + return os.WriteFile(outputPath, data, 0644) +} diff --git a/test/loadtest/internal/reloader/reloader.go b/test/loadtest/internal/reloader/reloader.go new file mode 100644 index 0000000..2667cd4 --- /dev/null +++ b/test/loadtest/internal/reloader/reloader.go @@ -0,0 +1,271 @@ +package reloader + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// Config holds configuration for a Reloader deployment. +type Config struct { + Version string + Image string + Namespace string + ReloadStrategy string +} + +// Manager handles Reloader deployment operations. +type Manager struct { + config Config + kubeContext string +} + +// NewManager creates a new Reloader manager. +func NewManager(config Config) *Manager { + return &Manager{ + config: config, + } +} + +// SetKubeContext sets the kubeconfig context to use. +func (m *Manager) SetKubeContext(kubeContext string) { + m.kubeContext = kubeContext +} + +// kubectl returns kubectl command with optional context. +func (m *Manager) kubectl(ctx context.Context, args ...string) *exec.Cmd { + if m.kubeContext != "" { + args = append([]string{"--context", m.kubeContext}, args...) + } + return exec.CommandContext(ctx, "kubectl", args...) +} + +// namespace returns the namespace for this reloader instance. +func (m *Manager) namespace() string { + if m.config.Namespace != "" { + return m.config.Namespace + } + return fmt.Sprintf("reloader-%s", m.config.Version) +} + +// releaseName returns the release name for this instance. +func (m *Manager) releaseName() string { + return fmt.Sprintf("reloader-%s", m.config.Version) +} + +// Job returns the Prometheus job name for this Reloader instance. +func (m *Manager) Job() string { + return fmt.Sprintf("reloader-%s", m.config.Version) +} + +// Deploy deploys Reloader to the cluster using raw manifests. +func (m *Manager) Deploy(ctx context.Context) error { + ns := m.namespace() + name := m.releaseName() + + fmt.Printf("Deploying Reloader (%s) with image %s...\n", m.config.Version, m.config.Image) + + manifest := m.buildManifest(ns, name) + + applyCmd := m.kubectl(ctx, "apply", "-f", "-") + applyCmd.Stdin = strings.NewReader(manifest) + applyCmd.Stdout = os.Stdout + applyCmd.Stderr = os.Stderr + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("applying manifest: %w", err) + } + + fmt.Printf("Waiting for Reloader deployment to be ready...\n") + waitCmd := m.kubectl(ctx, "rollout", "status", "deployment", name, + "-n", ns, + "--timeout=120s") + waitCmd.Stdout = os.Stdout + waitCmd.Stderr = os.Stderr + if err := waitCmd.Run(); err != nil { + return fmt.Errorf("waiting for deployment: %w", err) + } + + time.Sleep(2 * time.Second) + + fmt.Printf("Reloader (%s) deployed successfully\n", m.config.Version) + return nil +} + +// buildManifest creates the raw Kubernetes manifest for Reloader. +func (m *Manager) buildManifest(ns, name string) string { + var args []string + args = append(args, "--log-format=json") + if m.config.ReloadStrategy != "" && m.config.ReloadStrategy != "default" { + args = append(args, fmt.Sprintf("--reload-strategy=%s", m.config.ReloadStrategy)) + } + + argsYAML := "" + if len(args) > 0 { + argsYAML = " args:\n" + for _, arg := range args { + argsYAML += fmt.Sprintf(" - %q\n", arg) + } + } + + return fmt.Sprintf(`--- +apiVersion: v1 +kind: Namespace +metadata: + name: %[1]s +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: %[2]s + namespace: %[1]s +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: %[2]s +rules: +- apiGroups: ["*"] + resources: ["*"] + verbs: ["*"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: %[2]s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: %[2]s +subjects: +- kind: ServiceAccount + name: %[2]s + namespace: %[1]s +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: %[2]s + namespace: %[1]s + labels: + app: %[2]s + app.kubernetes.io/name: reloader + loadtest-version: %[3]s +spec: + replicas: 1 + selector: + matchLabels: + app: %[2]s + template: + metadata: + labels: + app: %[2]s + app.kubernetes.io/name: reloader + loadtest-version: %[3]s + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" + spec: + serviceAccountName: %[2]s + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - name: reloader + image: %[4]s + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 9090 +%[5]s resources: + requests: + cpu: 10m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true +`, ns, name, m.config.Version, m.config.Image, argsYAML) +} + +// Cleanup removes all Reloader resources from the cluster. +func (m *Manager) Cleanup(ctx context.Context) error { + ns := m.namespace() + name := m.releaseName() + + delDeploy := m.kubectl(ctx, "delete", "deployment", name, "-n", ns, "--ignore-not-found") + delDeploy.Run() + + delCRB := m.kubectl(ctx, "delete", "clusterrolebinding", name, "--ignore-not-found") + delCRB.Run() + + delCR := m.kubectl(ctx, "delete", "clusterrole", name, "--ignore-not-found") + delCR.Run() + + delNS := m.kubectl(ctx, "delete", "namespace", ns, "--wait=false", "--ignore-not-found") + if err := delNS.Run(); err != nil { + return fmt.Errorf("deleting namespace: %w", err) + } + + return nil +} + +// CleanupByVersion removes Reloader resources for a specific version without needing a Manager instance. +// This is useful for cleaning up from previous runs before creating a new Manager. +func CleanupByVersion(ctx context.Context, version, kubeContext string) { + ns := fmt.Sprintf("reloader-%s", version) + name := fmt.Sprintf("reloader-%s", version) + + nsArgs := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"} + crArgs := []string{"delete", "clusterrole", name, "--ignore-not-found"} + crbArgs := []string{"delete", "clusterrolebinding", name, "--ignore-not-found"} + + if kubeContext != "" { + nsArgs = append([]string{"--context", kubeContext}, nsArgs...) + crArgs = append([]string{"--context", kubeContext}, crArgs...) + crbArgs = append([]string{"--context", kubeContext}, crbArgs...) + } + + exec.CommandContext(ctx, "kubectl", nsArgs...).Run() + exec.CommandContext(ctx, "kubectl", crArgs...).Run() + exec.CommandContext(ctx, "kubectl", crbArgs...).Run() +} + +// CollectLogs collects logs from the Reloader pod and writes them to the specified file. +func (m *Manager) CollectLogs(ctx context.Context, logPath string) error { + ns := m.namespace() + name := m.releaseName() + + if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil { + return fmt.Errorf("creating log directory: %w", err) + } + + cmd := m.kubectl(ctx, "logs", + "-n", ns, + "-l", fmt.Sprintf("app=%s", name), + "--tail=-1") + + out, err := cmd.Output() + if err != nil { + cmd = m.kubectl(ctx, "logs", + "-n", ns, + "-l", "app.kubernetes.io/name=reloader", + "--tail=-1") + out, err = cmd.Output() + if err != nil { + return fmt.Errorf("collecting logs: %w", err) + } + } + + if err := os.WriteFile(logPath, out, 0644); err != nil { + return fmt.Errorf("writing logs: %w", err) + } + + return nil +} diff --git a/test/loadtest/internal/scenarios/scenarios.go b/test/loadtest/internal/scenarios/scenarios.go new file mode 100644 index 0000000..4909feb --- /dev/null +++ b/test/loadtest/internal/scenarios/scenarios.go @@ -0,0 +1,2037 @@ +// Package scenarios contains all load test scenario implementations. +package scenarios + +import ( + "context" + "encoding/json" + "fmt" + "log" + "math/rand" + "os" + "path/filepath" + "sync" + "time" + + "github.com/stakater/Reloader/test/loadtest/internal/reloader" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/kubernetes" +) + +// ExpectedMetrics holds the expected values for metrics verification. +type ExpectedMetrics struct { + ActionTotal int `json:"action_total"` + ReloadExecutedTotal int `json:"reload_executed_total"` + ReconcileTotal int `json:"reconcile_total"` + WorkloadsScannedTotal int `json:"workloads_scanned_total"` + WorkloadsMatchedTotal int `json:"workloads_matched_total"` + SkippedTotal int `json:"skipped_total"` + Description string `json:"description"` +} + +// Runner defines the interface for test scenarios. +type Runner interface { + Name() string + Description() string + Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) +} + +// Registry holds all available test scenarios. +var Registry = map[string]Runner{ + "S1": &BurstUpdateScenario{}, + "S2": &FanOutScenario{}, + "S3": &HighCardinalityScenario{}, + "S4": &NoOpUpdateScenario{}, + "S5": &WorkloadChurnScenario{}, + "S6": &ControllerRestartScenario{}, + "S7": &APIPressureScenario{}, + "S8": &LargeObjectScenario{}, + "S9": &MultiWorkloadTypeScenario{}, + "S10": &SecretsAndMixedScenario{}, + "S11": &AnnotationStrategyScenario{}, + "S12": &PauseResumeScenario{}, + "S13": &ComplexReferencesScenario{}, +} + +// WriteExpectedMetrics writes expected metrics to a JSON file. +func WriteExpectedMetrics(scenario, resultsDir string, expected ExpectedMetrics) error { + if resultsDir == "" { + return nil + } + + dir := filepath.Join(resultsDir, scenario) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating results directory: %w", err) + } + + data, err := json.MarshalIndent(expected, "", " ") + if err != nil { + return fmt.Errorf("marshaling expected metrics: %w", err) + } + + path := filepath.Join(dir, "expected.json") + if err := os.WriteFile(path, data, 0644); err != nil { + return fmt.Errorf("writing expected metrics: %w", err) + } + + log.Printf("Expected metrics written to %s", path) + return nil +} + +// BurstUpdateScenario - Many ConfigMap/Secret updates in quick succession. +type BurstUpdateScenario struct{} + +func (s *BurstUpdateScenario) Name() string { return "S1" } +func (s *BurstUpdateScenario) Description() string { return "Burst ConfigMap/Secret updates" } + +func (s *BurstUpdateScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S1: Creating base ConfigMaps and Deployments...") + + const numConfigMaps = 10 + const numDeployments = 10 + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("burst-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial-value"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("burst-deploy-%d", i), namespace, fmt.Sprintf("burst-cm-%d", i)) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S1: Starting burst updates...") + + updateCount := 0 + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + log.Printf("S1: Context cancelled, completed %d burst updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + WorkloadsMatchedTotal: updateCount, + Description: fmt.Sprintf("S1: %d burst updates, each triggers 1 deployment reload", updateCount), + }, nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("burst-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("value-%d-%d", updateCount, time.Now().UnixNano()) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + } + } + } + + log.Printf("S1: Completed %d burst updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + WorkloadsMatchedTotal: updateCount, + Description: fmt.Sprintf("S1: %d burst updates, each triggers 1 deployment reload", updateCount), + }, nil +} + +// FanOutScenario - One ConfigMap used by many workloads. +type FanOutScenario struct{} + +func (s *FanOutScenario) Name() string { return "S2" } +func (s *FanOutScenario) Description() string { return "Fan-out (one CM -> many workloads)" } + +func (s *FanOutScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S2: Creating shared ConfigMap and multiple Deployments...") + + const numDeployments = 50 + setupCtx := context.Background() + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "shared-cm", + Namespace: namespace, + }, + Data: map[string]string{"config": "initial"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + return ExpectedMetrics{}, fmt.Errorf("failed to create shared ConfigMap: %w", err) + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("fanout-deploy-%d", i), namespace, "shared-cm") + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment %d: %v", i, err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 5*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S2: Updating shared ConfigMap...") + + if ctx.Err() != nil { + log.Printf("S2: WARNING - Context already done before update loop: %v", ctx.Err()) + } + if deadline, ok := ctx.Deadline(); ok { + remaining := time.Until(deadline) + log.Printf("S2: Context deadline in %v", remaining) + if remaining < 10*time.Second { + log.Printf("S2: WARNING - Very little time remaining on context!") + } + } else { + log.Println("S2: Context has no deadline") + } + + updateCount := 0 + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + log.Printf("S2: Will run updates for %v (duration=%v)", duration-5*time.Second, duration) + + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + expectedActions := updateCount * numDeployments + log.Printf("S2: Context done (err=%v), completed %d fan-out updates", ctx.Err(), updateCount) + return ExpectedMetrics{ + ActionTotal: expectedActions, + ReloadExecutedTotal: expectedActions, + WorkloadsScannedTotal: expectedActions, + WorkloadsMatchedTotal: expectedActions, + Description: fmt.Sprintf("S2: %d updates × %d deployments = %d expected reloads", updateCount, numDeployments, expectedActions), + }, nil + case <-ticker.C: + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "shared-cm", metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["config"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update shared ConfigMap: %v", err) + } else { + updateCount++ + log.Printf("S2: Updated shared ConfigMap (should trigger %d reloads)", numDeployments) + } + } + } + + expectedActions := updateCount * numDeployments + log.Printf("S2: Completed %d fan-out updates, expected %d total actions", updateCount, expectedActions) + return ExpectedMetrics{ + ActionTotal: expectedActions, + ReloadExecutedTotal: expectedActions, + WorkloadsScannedTotal: expectedActions, + WorkloadsMatchedTotal: expectedActions, + Description: fmt.Sprintf("S2: %d updates × %d deployments = %d expected reloads", updateCount, numDeployments, expectedActions), + }, nil +} + +// HighCardinalityScenario - Many ConfigMaps/Secrets across many namespaces. +type HighCardinalityScenario struct{} + +func (s *HighCardinalityScenario) Name() string { return "S3" } +func (s *HighCardinalityScenario) Description() string { + return "High cardinality (many CMs, many namespaces)" +} + +func (s *HighCardinalityScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S3: Creating high cardinality resources...") + + setupCtx := context.Background() + + namespaces := []string{namespace} + for i := 0; i < 10; i++ { + ns := fmt.Sprintf("%s-%d", namespace, i) + if _, err := client.CoreV1().Namespaces().Create(setupCtx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: ns}, + }, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create namespace %s: %v", ns, err) + } else { + namespaces = append(namespaces, ns) + } + } + + for _, ns := range namespaces { + for i := 0; i < 20; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("hc-cm-%d", i), + Namespace: ns, + }, + Data: map[string]string{"key": "value"}, + } + client.CoreV1().ConfigMaps(ns).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("hc-deploy-%d", i), ns, fmt.Sprintf("hc-cm-%d", i)) + client.AppsV1().Deployments(ns).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + } + + if err := waitForAllNamespacesReady(setupCtx, client, namespaces, 5*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S3: Starting random updates across namespaces...") + + updateDuration := duration - 5*time.Second + if updateDuration < 30*time.Second { + updateDuration = 30 * time.Second + } + + updateCount := 0 + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + updateCtx, updateCancel := context.WithTimeout(context.Background(), updateDuration) + defer updateCancel() + + endTime := time.Now().Add(updateDuration) + log.Printf("S3: Will run updates for %v (until %v)", updateDuration, endTime.Format("15:04:05")) + + for time.Now().Before(endTime) { + select { + case <-updateCtx.Done(): + log.Printf("S3: Completed %d high cardinality updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S3: %d updates across %d namespaces", updateCount, len(namespaces)), + }, nil + case <-ticker.C: + ns := namespaces[rand.Intn(len(namespaces))] + cmIndex := rand.Intn(20) + cm, err := client.CoreV1().ConfigMaps(ns).Get(setupCtx, fmt.Sprintf("hc-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(ns).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + updateCount++ + } + } + } + + log.Printf("S3: Completed %d high cardinality updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S3: %d updates across %d namespaces", updateCount, len(namespaces)), + }, nil +} + +// NoOpUpdateScenario - Updates that don't actually change data. +type NoOpUpdateScenario struct{} + +func (s *NoOpUpdateScenario) Name() string { return "S4" } +func (s *NoOpUpdateScenario) Description() string { return "No-op updates (same data)" } + +func (s *NoOpUpdateScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S4: Creating ConfigMaps and Deployments for no-op test...") + + setupCtx := context.Background() + + for i := 0; i < 10; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("noop-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "static-value"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("noop-deploy-%d", i), namespace, fmt.Sprintf("noop-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S4: Starting no-op updates (annotation changes only)...") + + updateCount := 0 + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + log.Printf("S4: Completed %d no-op updates", updateCount) + return ExpectedMetrics{ + ActionTotal: 0, + ReloadExecutedTotal: 0, + SkippedTotal: updateCount, + Description: fmt.Sprintf("S4: %d no-op updates, all should be skipped", updateCount), + }, nil + case <-ticker.C: + cmIndex := rand.Intn(10) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("noop-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + if cm.Annotations == nil { + cm.Annotations = make(map[string]string) + } + cm.Annotations["noop-counter"] = fmt.Sprintf("%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + updateCount++ + } + } + } + + log.Printf("S4: Completed %d no-op updates (should see 0 actions)", updateCount) + return ExpectedMetrics{ + ActionTotal: 0, + ReloadExecutedTotal: 0, + SkippedTotal: updateCount, + Description: fmt.Sprintf("S4: %d no-op updates, all should be skipped", updateCount), + }, nil +} + +// WorkloadChurnScenario - Deployments created and deleted rapidly. +type WorkloadChurnScenario struct{} + +func (s *WorkloadChurnScenario) Name() string { return "S5" } +func (s *WorkloadChurnScenario) Description() string { return "Workload churn (rapid create/delete)" } + +func (s *WorkloadChurnScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S5: Creating base ConfigMap...") + + setupCtx := context.Background() + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "churn-cm", Namespace: namespace}, + Data: map[string]string{"key": "value"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + + log.Println("S5: Starting workload churn...") + + var wg sync.WaitGroup + var mu sync.Mutex + deployCounter := 0 + deleteCounter := 0 + cmUpdateCount := 0 + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return + case <-ticker.C: + deployName := fmt.Sprintf("churn-deploy-%d", deployCounter) + deploy := createDeployment(deployName, namespace, "churn-cm") + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err == nil { + mu.Lock() + deployCounter++ + mu.Unlock() + } + if deployCounter > 10 { + oldName := fmt.Sprintf("churn-deploy-%d", deployCounter-10) + if err := client.AppsV1().Deployments(namespace).Delete(setupCtx, oldName, metav1.DeleteOptions{}); err == nil { + mu.Lock() + deleteCounter++ + mu.Unlock() + } + } + } + } + }() + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return + case <-ticker.C: + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "churn-cm", metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d", cmUpdateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + mu.Lock() + cmUpdateCount++ + mu.Unlock() + } + } + } + }() + + wg.Wait() + log.Printf("S5: Created %d, deleted %d deployments, %d CM updates", deployCounter, deleteCounter, cmUpdateCount) + + return ExpectedMetrics{ + Description: fmt.Sprintf("S5: Churn test - %d deploys created, %d deleted, %d CM updates, ~10 active deploys at any time", deployCounter, deleteCounter, cmUpdateCount), + }, nil +} + +// ControllerRestartScenario - Restart controller under load. +type ControllerRestartScenario struct { + ReloaderVersion string +} + +func (s *ControllerRestartScenario) Name() string { return "S6" } +func (s *ControllerRestartScenario) Description() string { + return "Controller restart under load" +} + +func (s *ControllerRestartScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S6: Creating resources and generating load...") + + setupCtx := context.Background() + + for i := 0; i < 20; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("restart-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("restart-deploy-%d", i), namespace, fmt.Sprintf("restart-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + var wg sync.WaitGroup + var mu sync.Mutex + updateCount := 0 + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return + case <-ticker.C: + cmIndex := rand.Intn(20) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("restart-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + mu.Lock() + updateCount++ + mu.Unlock() + } + } + } + }() + + reloaderNS := fmt.Sprintf("reloader-%s", s.ReloaderVersion) + if s.ReloaderVersion == "" { + reloaderNS = "reloader-new" + } + + log.Println("S6: Waiting 20 seconds before restarting controller...") + time.Sleep(20 * time.Second) + + log.Println("S6: Restarting Reloader pod...") + pods, err := client.CoreV1().Pods(reloaderNS).List(setupCtx, metav1.ListOptions{ + LabelSelector: "app=reloader", + }) + if err == nil && len(pods.Items) > 0 { + client.CoreV1().Pods(reloaderNS).Delete(setupCtx, pods.Items[0].Name, metav1.DeleteOptions{}) + } + + wg.Wait() + log.Printf("S6: Controller restart scenario completed with %d updates", updateCount) + return ExpectedMetrics{ + Description: fmt.Sprintf("S6: Restart test - %d updates during restart", updateCount), + }, nil +} + +// APIPressureScenario - Simulate API server pressure with many concurrent requests. +type APIPressureScenario struct{} + +func (s *APIPressureScenario) Name() string { return "S7" } +func (s *APIPressureScenario) Description() string { return "API pressure (many concurrent requests)" } + +func (s *APIPressureScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S7: Creating resources for API pressure test...") + + const numConfigMaps = 50 + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("api-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "value"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("api-deploy-%d", i), namespace, fmt.Sprintf("api-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 5*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S7: Starting concurrent updates from multiple goroutines...") + + updateDuration := duration - 5*time.Second + if updateDuration < 30*time.Second { + updateDuration = 30 * time.Second + } + + updateCtx, updateCancel := context.WithTimeout(context.Background(), updateDuration) + defer updateCancel() + + endTime := time.Now().Add(updateDuration) + log.Printf("S7: Will run updates for %v (until %v)", updateDuration, endTime.Format("15:04:05")) + + var wg sync.WaitGroup + var mu sync.Mutex + totalUpdates := 0 + + for g := 0; g < 10; g++ { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + updateCount := 0 + for time.Now().Before(endTime) { + select { + case <-updateCtx.Done(): + return + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("api-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("g%d-update-%d", goroutineID, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + updateCount++ + } + } + } + mu.Lock() + totalUpdates += updateCount + mu.Unlock() + log.Printf("S7: Goroutine %d completed %d updates", goroutineID, updateCount) + }(g) + } + + wg.Wait() + log.Printf("S7: API pressure scenario completed with %d total updates", totalUpdates) + return ExpectedMetrics{ + ActionTotal: totalUpdates, + ReloadExecutedTotal: totalUpdates, + Description: fmt.Sprintf("S7: %d concurrent updates from 10 goroutines", totalUpdates), + }, nil +} + +// LargeObjectScenario - Large ConfigMaps/Secrets. +type LargeObjectScenario struct{} + +func (s *LargeObjectScenario) Name() string { return "S8" } +func (s *LargeObjectScenario) Description() string { return "Large ConfigMaps/Secrets (>100KB)" } + +func (s *LargeObjectScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S8: Creating large ConfigMaps...") + + setupCtx := context.Background() + + largeData := make([]byte, 100*1024) + for i := range largeData { + largeData[i] = byte('a' + (i % 26)) + } + largeValue := string(largeData) + + for i := 0; i < 10; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("large-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{ + "large-key-1": largeValue, + "large-key-2": largeValue, + }, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create large ConfigMap %d: %v", i, err) + } + deploy := createDeployment(fmt.Sprintf("large-deploy-%d", i), namespace, fmt.Sprintf("large-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S8: Starting large object updates...") + + updateCount := 0 + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + log.Printf("S8: Completed %d large object updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S8: %d large object (100KB) updates", updateCount), + }, nil + case <-ticker.C: + cmIndex := rand.Intn(10) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("large-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["large-key-1"] = largeValue[:len(largeValue)-10] + fmt.Sprintf("-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update large ConfigMap: %v", err) + } else { + updateCount++ + } + } + } + + log.Printf("S8: Completed %d large object updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S8: %d large object (100KB) updates", updateCount), + }, nil +} + +func waitForDeploymentsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error { + log.Printf("Waiting for all deployments in %s to be ready (timeout: %v)...", namespace, timeout) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + deployments, err := client.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list deployments: %w", err) + } + + allReady := true + notReady := 0 + for _, d := range deployments.Items { + if d.Status.ReadyReplicas < *d.Spec.Replicas { + allReady = false + notReady++ + } + } + + if allReady && len(deployments.Items) > 0 { + log.Printf("All %d deployments in %s are ready", len(deployments.Items), namespace) + return nil + } + + log.Printf("Waiting for deployments: %d/%d not ready yet...", notReady, len(deployments.Items)) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for deployments to be ready") +} + +func waitForAllNamespacesReady(ctx context.Context, client kubernetes.Interface, namespaces []string, timeout time.Duration) error { + log.Printf("Waiting for deployments in %d namespaces to be ready...", len(namespaces)) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + allReady := true + totalDeploys := 0 + notReady := 0 + + for _, ns := range namespaces { + deployments, err := client.AppsV1().Deployments(ns).List(ctx, metav1.ListOptions{}) + if err != nil { + continue + } + for _, d := range deployments.Items { + totalDeploys++ + if d.Status.ReadyReplicas < *d.Spec.Replicas { + allReady = false + notReady++ + } + } + } + + if allReady && totalDeploys > 0 { + log.Printf("All %d deployments across %d namespaces are ready", totalDeploys, len(namespaces)) + return nil + } + + log.Printf("Waiting: %d/%d deployments not ready yet...", notReady, totalDeploys) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for deployments to be ready") +} + +func createDeployment(name, namespace, configMapName string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func createDeploymentWithSecret(name, namespace, secretName string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + SecretRef: &corev1.SecretEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secretName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func createDeploymentWithBoth(name, namespace, configMapName, secretName string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + { + SecretRef: &corev1.SecretEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secretName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +// SecretsAndMixedScenario - Tests Secrets and mixed ConfigMap+Secret workloads. +type SecretsAndMixedScenario struct{} + +func (s *SecretsAndMixedScenario) Name() string { return "S10" } +func (s *SecretsAndMixedScenario) Description() string { + return "Secrets and mixed ConfigMap+Secret workloads" +} + +func (s *SecretsAndMixedScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S10: Creating Secrets, ConfigMaps, and mixed workloads...") + + const numSecrets = 5 + const numConfigMaps = 5 + const numSecretOnlyDeploys = 5 + const numConfigMapOnlyDeploys = 3 + const numMixedDeploys = 2 + + setupCtx := context.Background() + + for i := 0; i < numSecrets; i++ { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("mixed-secret-%d", i), + Namespace: namespace, + }, + StringData: map[string]string{ + "password": fmt.Sprintf("initial-secret-%d", i), + }, + } + if _, err := client.CoreV1().Secrets(namespace).Create(setupCtx, secret, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Secret %s: %v", secret.Name, err) + } + } + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("mixed-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{ + "config": fmt.Sprintf("initial-config-%d", i), + }, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numSecretOnlyDeploys; i++ { + deploy := createDeploymentWithSecret( + fmt.Sprintf("secret-only-deploy-%d", i), + namespace, + fmt.Sprintf("mixed-secret-%d", i%numSecrets), + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Secret-only Deployment: %v", err) + } + } + + for i := 0; i < numConfigMapOnlyDeploys; i++ { + deploy := createDeployment( + fmt.Sprintf("cm-only-deploy-%d", i), + namespace, + fmt.Sprintf("mixed-cm-%d", i%numConfigMaps), + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap-only Deployment: %v", err) + } + } + + for i := 0; i < numMixedDeploys; i++ { + deploy := createDeploymentWithBoth( + fmt.Sprintf("mixed-deploy-%d", i), + namespace, + fmt.Sprintf("mixed-cm-%d", i%numConfigMaps), + fmt.Sprintf("mixed-secret-%d", i%numSecrets), + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create mixed Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S10: Starting alternating Secret and ConfigMap updates...") + + secretUpdateCount := 0 + cmUpdateCount := 0 + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + updateSecret := true + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(secretUpdateCount, cmUpdateCount, numSecrets, numConfigMaps, numSecretOnlyDeploys, numConfigMapOnlyDeploys, numMixedDeploys), nil + case <-ticker.C: + if updateSecret { + secretIndex := rand.Intn(numSecrets) + secret, err := client.CoreV1().Secrets(namespace).Get(setupCtx, fmt.Sprintf("mixed-secret-%d", secretIndex), metav1.GetOptions{}) + if err != nil { + continue + } + secret.StringData = map[string]string{ + "password": fmt.Sprintf("updated-secret-%d-%d", secretIndex, secretUpdateCount), + } + if _, err := client.CoreV1().Secrets(namespace).Update(setupCtx, secret, metav1.UpdateOptions{}); err == nil { + secretUpdateCount++ + } + } else { + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("mixed-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["config"] = fmt.Sprintf("updated-config-%d-%d", cmIndex, cmUpdateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + cmUpdateCount++ + } + } + updateSecret = !updateSecret + } + } + + log.Printf("S10: Completed %d Secret updates and %d ConfigMap updates", secretUpdateCount, cmUpdateCount) + return s.calculateExpected(secretUpdateCount, cmUpdateCount, numSecrets, numConfigMaps, numSecretOnlyDeploys, numConfigMapOnlyDeploys, numMixedDeploys), nil +} + +func (s *SecretsAndMixedScenario) calculateExpected(secretUpdates, cmUpdates, numSecrets, numConfigMaps, secretOnlyDeploys, cmOnlyDeploys, mixedDeploys int) ExpectedMetrics { + avgSecretReloads := float64(secretOnlyDeploys)/float64(numSecrets) + float64(mixedDeploys)/float64(numSecrets) + secretTriggeredReloads := int(float64(secretUpdates) * avgSecretReloads) + + avgCMReloads := float64(cmOnlyDeploys)/float64(numConfigMaps) + float64(mixedDeploys)/float64(numConfigMaps) + cmTriggeredReloads := int(float64(cmUpdates) * avgCMReloads) + + totalExpectedReloads := secretTriggeredReloads + cmTriggeredReloads + + return ExpectedMetrics{ + ActionTotal: totalExpectedReloads, + ReloadExecutedTotal: totalExpectedReloads, + Description: fmt.Sprintf("S10: %d Secret updates (→%d reloads, avg %.1f/update) + %d CM updates (→%d reloads, avg %.1f/update) = %d total", + secretUpdates, secretTriggeredReloads, avgSecretReloads, cmUpdates, cmTriggeredReloads, avgCMReloads, totalExpectedReloads), + } +} + +// MultiWorkloadTypeScenario - Tests all supported workload types with a shared ConfigMap. +type MultiWorkloadTypeScenario struct{} + +func (s *MultiWorkloadTypeScenario) Name() string { return "S9" } +func (s *MultiWorkloadTypeScenario) Description() string { + return "Multi-workload types (Deploy, StatefulSet, DaemonSet, Job, CronJob)" +} + +func (s *MultiWorkloadTypeScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S9: Creating shared ConfigMap and multiple workload types...") + + const numDeployments = 5 + const numStatefulSets = 3 + const numDaemonSets = 2 + + setupCtx := context.Background() + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-type-cm", + Namespace: namespace, + }, + Data: map[string]string{"config": "initial"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + return ExpectedMetrics{}, fmt.Errorf("failed to create shared ConfigMap: %w", err) + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("multi-deploy-%d", i), namespace, "multi-type-cm") + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment %d: %v", i, err) + } + } + + for i := 0; i < numStatefulSets; i++ { + sts := createStatefulSet(fmt.Sprintf("multi-sts-%d", i), namespace, "multi-type-cm") + if _, err := client.AppsV1().StatefulSets(namespace).Create(setupCtx, sts, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create StatefulSet %d: %v", i, err) + } + } + + for i := 0; i < numDaemonSets; i++ { + ds := createDaemonSet(fmt.Sprintf("multi-ds-%d", i), namespace, "multi-type-cm") + if _, err := client.AppsV1().DaemonSets(namespace).Create(setupCtx, ds, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create DaemonSet %d: %v", i, err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + if err := waitForStatefulSetsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + if err := waitForDaemonSetsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S9: Starting ConfigMap updates to trigger reloads on all workload types...") + + updateCount := 0 + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets), nil + case <-ticker.C: + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "multi-type-cm", metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["config"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update shared ConfigMap: %v", err) + } else { + updateCount++ + log.Printf("S9: Updated shared ConfigMap (update #%d)", updateCount) + } + } + } + + log.Printf("S9: Completed %d ConfigMap updates", updateCount) + return s.calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets), nil +} + +func (s *MultiWorkloadTypeScenario) calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets int) ExpectedMetrics { + totalWorkloads := numDeployments + numStatefulSets + numDaemonSets + expectedReloads := updateCount * totalWorkloads + + return ExpectedMetrics{ + ActionTotal: expectedReloads, + ReloadExecutedTotal: expectedReloads, + WorkloadsMatchedTotal: expectedReloads, + Description: fmt.Sprintf("S9: %d CM updates × %d workloads (%d Deploys + %d STS + %d DS) = %d reloads", + updateCount, totalWorkloads, numDeployments, numStatefulSets, numDaemonSets, expectedReloads), + } +} + +func createStatefulSet(name, namespace, configMapName string) *appsv1.StatefulSet { + replicas := int32(1) + terminationGracePeriod := int64(0) + + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: &replicas, + ServiceName: name, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func createDaemonSet(name, namespace, configMapName string) *appsv1.DaemonSet { + terminationGracePeriod := int64(0) + + return &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + { + Key: "node-role.kubernetes.io/master", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + }, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func waitForStatefulSetsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error { + log.Printf("Waiting for all StatefulSets in %s to be ready (timeout: %v)...", namespace, timeout) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + stsList, err := client.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list StatefulSets: %w", err) + } + + if len(stsList.Items) == 0 { + log.Printf("No StatefulSets found in %s", namespace) + return nil + } + + allReady := true + notReady := 0 + for _, sts := range stsList.Items { + if sts.Status.ReadyReplicas < *sts.Spec.Replicas { + allReady = false + notReady++ + } + } + + if allReady { + log.Printf("All %d StatefulSets in %s are ready", len(stsList.Items), namespace) + return nil + } + + log.Printf("Waiting for StatefulSets: %d/%d not ready yet...", notReady, len(stsList.Items)) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for StatefulSets to be ready") +} + +func waitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error { + log.Printf("Waiting for all DaemonSets in %s to be ready (timeout: %v)...", namespace, timeout) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list DaemonSets: %w", err) + } + + if len(dsList.Items) == 0 { + log.Printf("No DaemonSets found in %s", namespace) + return nil + } + + allReady := true + notReady := 0 + for _, ds := range dsList.Items { + if ds.Status.NumberReady < ds.Status.DesiredNumberScheduled { + allReady = false + notReady++ + } + } + + if allReady { + log.Printf("All %d DaemonSets in %s are ready", len(dsList.Items), namespace) + return nil + } + + log.Printf("Waiting for DaemonSets: %d/%d not ready yet...", notReady, len(dsList.Items)) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for DaemonSets to be ready") +} + +// ComplexReferencesScenario - Tests init containers, valueFrom, and projected volumes. +type ComplexReferencesScenario struct{} + +func (s *ComplexReferencesScenario) Name() string { return "S13" } +func (s *ComplexReferencesScenario) Description() string { + return "Complex references (init containers, valueFrom, projected volumes)" +} + +func (s *ComplexReferencesScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S13: Creating ConfigMaps and complex deployments with various reference types...") + + const numConfigMaps = 5 + const numDeployments = 5 + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("complex-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{ + "key1": fmt.Sprintf("value1-%d", i), + "key2": fmt.Sprintf("value2-%d", i), + "config": fmt.Sprintf("config-%d", i), + }, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + primaryCM := fmt.Sprintf("complex-cm-%d", i) + secondaryCM := fmt.Sprintf("complex-cm-%d", (i+1)%numConfigMaps) + + deploy := createComplexDeployment( + fmt.Sprintf("complex-deploy-%d", i), + namespace, + primaryCM, + secondaryCM, + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create complex Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S13: Starting ConfigMap updates to test all reference types...") + + updateCount := 0 + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, numConfigMaps, numDeployments), nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("complex-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key1"] = fmt.Sprintf("updated-value1-%d-%d", cmIndex, updateCount) + cm.Data["config"] = fmt.Sprintf("updated-config-%d-%d", cmIndex, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + log.Printf("S13: Updated complex-cm-%d (update #%d)", cmIndex, updateCount) + } + } + } + + log.Printf("S13: Completed %d ConfigMap updates", updateCount) + return s.calculateExpected(updateCount, numConfigMaps, numDeployments), nil +} + +func (s *ComplexReferencesScenario) calculateExpected(updateCount, numConfigMaps, numDeployments int) ExpectedMetrics { + expectedReloadsPerUpdate := 2 + expectedReloads := updateCount * expectedReloadsPerUpdate + + return ExpectedMetrics{ + ActionTotal: expectedReloads, + ReloadExecutedTotal: expectedReloads, + Description: fmt.Sprintf("S13: %d CM updates × ~%d affected deploys = ~%d reloads (init containers, valueFrom, volumes, projected)", + updateCount, expectedReloadsPerUpdate, expectedReloads), + } +} + +// PauseResumeScenario - Tests pause-period functionality under rapid updates. +type PauseResumeScenario struct{} + +func (s *PauseResumeScenario) Name() string { return "S12" } +func (s *PauseResumeScenario) Description() string { + return "Pause & Resume (rapid updates with pause-period)" +} + +func (s *PauseResumeScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S12: Creating ConfigMaps and Deployments with pause-period annotation...") + + const numConfigMaps = 10 + const numDeployments = 10 + const pausePeriod = 15 * time.Second + const updateInterval = 2 * time.Second + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pause-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial-value"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeploymentWithPause( + fmt.Sprintf("pause-deploy-%d", i), + namespace, + fmt.Sprintf("pause-cm-%d", i), + pausePeriod, + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Printf("S12: Starting rapid ConfigMap updates (every %v) with %v pause-period...", updateInterval, pausePeriod) + + updateCount := 0 + ticker := time.NewTicker(updateInterval) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, duration, updateInterval, pausePeriod), nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("pause-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d-%d", cmIndex, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + } + } + } + + log.Printf("S12: Completed %d rapid updates (pause-period should reduce actual reloads)", updateCount) + return s.calculateExpected(updateCount, duration, updateInterval, pausePeriod), nil +} + +func (s *PauseResumeScenario) calculateExpected(updateCount int, duration, updateInterval, pausePeriod time.Duration) ExpectedMetrics { + + // This is an approximation - the actual value depends on random distribution + expectedCycles := int(duration / pausePeriod) + if expectedCycles < 1 { + expectedCycles = 1 + } + + return ExpectedMetrics{ + Description: fmt.Sprintf("S12: %d updates with %v pause-period (expect ~%d reload cycles, actual reloads << updates)", + updateCount, pausePeriod, expectedCycles), + } +} + +// AnnotationStrategyScenario - Tests annotation-based reload strategy. +// This scenario deploys its own Reloader instance with --reload-strategy=annotations. +type AnnotationStrategyScenario struct { + Image string +} + +func (s *AnnotationStrategyScenario) Name() string { return "S11" } +func (s *AnnotationStrategyScenario) Description() string { + return "Annotation reload strategy (--reload-strategy=annotations)" +} + +func (s *AnnotationStrategyScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + if s.Image == "" { + return ExpectedMetrics{}, fmt.Errorf("S11 requires Image to be set (use the same image as --new-image)") + } + + log.Println("S11: Deploying Reloader with --reload-strategy=annotations...") + + reloaderNS := "reloader-s11" + mgr := reloader.NewManager(reloader.Config{ + Version: "s11", + Image: s.Image, + Namespace: reloaderNS, + ReloadStrategy: "annotations", + }) + + if err := mgr.Deploy(ctx); err != nil { + return ExpectedMetrics{}, fmt.Errorf("deploying S11 reloader: %w", err) + } + + defer func() { + log.Println("S11: Cleaning up S11-specific Reloader...") + cleanupCtx := context.Background() + if err := mgr.Cleanup(cleanupCtx); err != nil { + log.Printf("Warning: failed to cleanup S11 reloader: %v", err) + } + }() + + log.Println("S11: Creating ConfigMaps and Deployments...") + + const numConfigMaps = 10 + const numDeployments = 10 + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("annot-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial-value"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("annot-deploy-%d", i), namespace, fmt.Sprintf("annot-cm-%d", i)) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S11: Starting ConfigMap updates with annotation strategy...") + + updateCount := 0 + annotationUpdatesSeen := 0 + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 10*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, annotationUpdatesSeen), nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("annot-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d-%d", cmIndex, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + } + + if updateCount%10 == 0 { + deploy, err := client.AppsV1().Deployments(namespace).Get(setupCtx, fmt.Sprintf("annot-deploy-%d", cmIndex), metav1.GetOptions{}) + if err == nil { + if _, hasAnnotation := deploy.Spec.Template.Annotations["reloader.stakater.com/last-reloaded-from"]; hasAnnotation { + annotationUpdatesSeen++ + } + } + } + } + } + + log.Println("S11: Verifying annotation-based reload...") + time.Sleep(5 * time.Second) + + deploysWithAnnotation := 0 + for i := 0; i < numDeployments; i++ { + deploy, err := client.AppsV1().Deployments(namespace).Get(setupCtx, fmt.Sprintf("annot-deploy-%d", i), metav1.GetOptions{}) + if err != nil { + continue + } + if deploy.Spec.Template.Annotations != nil { + if _, ok := deploy.Spec.Template.Annotations["reloader.stakater.com/last-reloaded-from"]; ok { + deploysWithAnnotation++ + } + } + } + + log.Printf("S11: Completed %d updates, %d deployments have reload annotation", updateCount, deploysWithAnnotation) + return s.calculateExpected(updateCount, deploysWithAnnotation), nil +} + +func (s *AnnotationStrategyScenario) calculateExpected(updateCount, deploysWithAnnotation int) ExpectedMetrics { + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S11: %d updates with annotation strategy, %d deployments received annotation", + updateCount, deploysWithAnnotation), + } +} + +func createDeploymentWithPause(name, namespace, configMapName string, pausePeriod time.Duration) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + // Deployment-specific pause-period annotation + "deployment.reloader.stakater.com/pause-period": fmt.Sprintf("%ds", int(pausePeriod.Seconds())), + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +// createComplexDeployment creates a deployment with multiple ConfigMap reference types. +// - Init container using envFrom +// - Main container using env.valueFrom.configMapKeyRef +// - Sidecar container using volume mount +// - Projected volume combining multiple ConfigMaps +func createComplexDeployment(name, namespace, primaryCM, secondaryCM string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + InitContainers: []corev1.Container{ + { + Name: "init", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "echo Init done"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + }, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + Env: []corev1.EnvVar{ + { + Name: "CONFIG_KEY1", + ValueFrom: &corev1.EnvVarSource{ + ConfigMapKeyRef: &corev1.ConfigMapKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + Key: "key1", + }, + }, + }, + { + Name: "CONFIG_KEY2", + ValueFrom: &corev1.EnvVarSource{ + ConfigMapKeyRef: &corev1.ConfigMapKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + Key: "key2", + }, + }, + }, + }, + }, + { + Name: "sidecar", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "config-volume", + MountPath: "/etc/config", + }, + { + Name: "projected-volume", + MountPath: "/etc/projected", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "config-volume", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + }, + }, + }, + { + Name: "projected-volume", + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + Items: []corev1.KeyToPath{ + { + Key: "key1", + Path: "primary-key1", + }, + }, + }, + }, + { + ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secondaryCM, + }, + Items: []corev1.KeyToPath{ + { + Key: "key1", + Path: "secondary-key1", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/test/loadtest/manifests/prometheus.yaml b/test/loadtest/manifests/prometheus.yaml new file mode 100644 index 0000000..f826f52 --- /dev/null +++ b/test/loadtest/manifests/prometheus.yaml @@ -0,0 +1,181 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: monitoring +data: + prometheus.yml: | + global: + scrape_interval: 2s + evaluation_interval: 2s + + scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'reloader-old' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - reloader-old + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'reloader-new' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - reloader-new + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: + - configmaps + verbs: ["get"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + serviceAccountName: prometheus + containers: + - name: prometheus + image: quay.io/prometheus/prometheus:v2.47.0 + args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.console.libraries=/usr/share/prometheus/console_libraries + - --web.console.templates=/usr/share/prometheus/consoles + - --web.enable-lifecycle + ports: + - containerPort: 9090 + volumeMounts: + - name: config + mountPath: /etc/prometheus + - name: data + mountPath: /prometheus + resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 200m + memory: 512Mi + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: config + configMap: + name: prometheus-config + - name: data + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: monitoring +spec: + selector: + app: prometheus + ports: + - port: 9090 + targetPort: 9090 + type: NodePort