diff --git a/.github/actions/loadtest/action.yml b/.github/actions/loadtest/action.yml new file mode 100644 index 0000000..3f71ae9 --- /dev/null +++ b/.github/actions/loadtest/action.yml @@ -0,0 +1,267 @@ +name: 'Reloader Load Test' +description: 'Run Reloader load tests with A/B comparison support' + +inputs: + old-ref: + description: 'Git ref for "old" version (optional, enables A/B comparison)' + required: false + default: '' + new-ref: + description: 'Git ref for "new" version (defaults to current checkout)' + required: false + default: '' + old-image: + description: 'Pre-built container image for "old" version (alternative to old-ref)' + required: false + default: '' + new-image: + description: 'Pre-built container image for "new" version (alternative to new-ref)' + required: false + default: '' + scenarios: + description: 'Scenarios to run: S1,S4,S6 or all' + required: false + default: 'S1,S4,S6' + test-type: + description: 'Test type label for summary: quick or full' + required: false + default: 'quick' + duration: + description: 'Test duration in seconds' + required: false + default: '60' + kind-cluster: + description: 'Name of existing Kind cluster (if empty, creates new one)' + required: false + default: '' + post-comment: + description: 'Post results as PR comment' + required: false + default: 'false' + pr-number: + description: 'PR number for commenting (required if post-comment is true)' + required: false + default: '' + github-token: + description: 'GitHub token for posting comments' + required: false + default: ${{ github.token }} + comment-header: + description: 'Optional header text for the comment' + required: false + default: '' + +outputs: + status: + description: 'Overall test status: pass or fail' + value: ${{ steps.run.outputs.status }} + summary: + description: 'Markdown summary of results' + value: ${{ steps.summary.outputs.summary }} + pass-count: + description: 'Number of passed scenarios' + value: ${{ steps.summary.outputs.pass_count }} + fail-count: + description: 'Number of failed scenarios' + value: ${{ steps.summary.outputs.fail_count }} + +runs: + using: 'composite' + steps: + - name: Determine images to use + id: images + shell: bash + run: | + # Determine old image + if [ -n "${{ inputs.old-image }}" ]; then + echo "old=${{ inputs.old-image }}" >> $GITHUB_OUTPUT + elif [ -n "${{ inputs.old-ref }}" ]; then + echo "old=localhost/reloader:old" >> $GITHUB_OUTPUT + echo "build_old=true" >> $GITHUB_OUTPUT + else + echo "old=" >> $GITHUB_OUTPUT + fi + + # Determine new image + if [ -n "${{ inputs.new-image }}" ]; then + echo "new=${{ inputs.new-image }}" >> $GITHUB_OUTPUT + elif [ -n "${{ inputs.new-ref }}" ]; then + echo "new=localhost/reloader:new" >> $GITHUB_OUTPUT + echo "build_new=true" >> $GITHUB_OUTPUT + else + # Default: build from current checkout + echo "new=localhost/reloader:new" >> $GITHUB_OUTPUT + echo "build_new_current=true" >> $GITHUB_OUTPUT + fi + + - name: Build old image from ref + if: steps.images.outputs.build_old == 'true' + shell: bash + run: | + CURRENT_SHA=$(git rev-parse HEAD) + git checkout ${{ inputs.old-ref }} + docker build -t localhost/reloader:old . + echo "Built old image from ref: ${{ inputs.old-ref }}" + git checkout $CURRENT_SHA + + - name: Build new image from ref + if: steps.images.outputs.build_new == 'true' + shell: bash + run: | + CURRENT_SHA=$(git rev-parse HEAD) + git checkout ${{ inputs.new-ref }} + docker build -t localhost/reloader:new . + echo "Built new image from ref: ${{ inputs.new-ref }}" + git checkout $CURRENT_SHA + + - name: Build new image from current checkout + if: steps.images.outputs.build_new_current == 'true' + shell: bash + run: | + docker build -t localhost/reloader:new . + echo "Built new image from current checkout" + + - name: Build loadtest binary + shell: bash + run: | + cd ${{ github.workspace }}/test/loadtest + go build -o loadtest ./cmd/loadtest + + - name: Determine cluster name + id: cluster + shell: bash + run: | + if [ -n "${{ inputs.kind-cluster }}" ]; then + echo "name=${{ inputs.kind-cluster }}" >> $GITHUB_OUTPUT + echo "skip=true" >> $GITHUB_OUTPUT + else + echo "name=reloader-loadtest" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT + fi + + - name: Load images into Kind + shell: bash + run: | + CLUSTER="${{ steps.cluster.outputs.name }}" + + if [ -n "${{ steps.images.outputs.old }}" ]; then + echo "Loading old image: ${{ steps.images.outputs.old }}" + kind load docker-image "${{ steps.images.outputs.old }}" --name "$CLUSTER" || true + fi + + echo "Loading new image: ${{ steps.images.outputs.new }}" + kind load docker-image "${{ steps.images.outputs.new }}" --name "$CLUSTER" || true + + - name: Run load tests + id: run + shell: bash + run: | + cd ${{ github.workspace }}/test/loadtest + + ARGS="--new-image=${{ steps.images.outputs.new }}" + ARGS="$ARGS --scenario=${{ inputs.scenarios }}" + ARGS="$ARGS --duration=${{ inputs.duration }}" + ARGS="$ARGS --cluster-name=${{ steps.cluster.outputs.name }}" + ARGS="$ARGS --skip-image-load" + + if [ -n "${{ steps.images.outputs.old }}" ]; then + ARGS="$ARGS --old-image=${{ steps.images.outputs.old }}" + fi + + if [ "${{ steps.cluster.outputs.skip }}" = "true" ]; then + ARGS="$ARGS --skip-cluster" + fi + + echo "Running: ./loadtest run $ARGS" + if ./loadtest run $ARGS; then + echo "status=pass" >> $GITHUB_OUTPUT + else + echo "status=fail" >> $GITHUB_OUTPUT + fi + + - name: Generate summary + id: summary + shell: bash + run: | + cd ${{ github.workspace }}/test/loadtest + + # Generate markdown summary + ./loadtest summary \ + --results-dir=./results \ + --test-type=${{ inputs.test-type }} \ + --format=markdown > summary.md 2>/dev/null || true + + # Output to GitHub Step Summary + cat summary.md >> $GITHUB_STEP_SUMMARY + + # Store summary for output (using heredoc for multiline) + { + echo 'summary<> $GITHUB_OUTPUT + + # Get pass/fail counts from JSON + COUNTS=$(./loadtest summary --format=json 2>/dev/null | head -20 || echo '{}') + echo "pass_count=$(echo "$COUNTS" | grep -o '"pass_count": [0-9]*' | grep -o '[0-9]*' || echo 0)" >> $GITHUB_OUTPUT + echo "fail_count=$(echo "$COUNTS" | grep -o '"fail_count": [0-9]*' | grep -o '[0-9]*' || echo 0)" >> $GITHUB_OUTPUT + + - name: Post PR comment + if: inputs.post-comment == 'true' && inputs.pr-number != '' + continue-on-error: true + uses: actions/github-script@v7 + with: + github-token: ${{ inputs.github-token }} + script: | + const fs = require('fs'); + const summaryPath = '${{ github.workspace }}/test/loadtest/summary.md'; + let summary = 'No results available'; + try { + summary = fs.readFileSync(summaryPath, 'utf8'); + } catch (e) { + console.log('Could not read summary file:', e.message); + } + + const header = '${{ inputs.comment-header }}'; + const status = '${{ steps.run.outputs.status }}'; + const statusEmoji = status === 'pass' ? ':white_check_mark:' : ':x:'; + + const body = [ + header ? header : `## ${statusEmoji} Load Test Results (${{ inputs.test-type }})`, + '', + summary, + '', + '---', + `**Artifacts:** [Download](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})`, + ].join('\n'); + + try { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: ${{ inputs.pr-number }}, + body: body + }); + console.log('Comment posted successfully'); + } catch (error) { + if (error.status === 403) { + console.log('Could not post comment (fork PR with restricted permissions). Use /loadtest command to run with comment posting.'); + } else { + throw error; + } + } + + - name: Upload results + uses: actions/upload-artifact@v4 + if: always() + with: + name: loadtest-${{ inputs.test-type }}-results + path: | + ${{ github.workspace }}/test/loadtest/results/ + retention-days: 30 + + - name: Cleanup Kind cluster (only if we created it) + if: always() && steps.cluster.outputs.skip == 'false' + shell: bash + run: | + kind delete cluster --name ${{ steps.cluster.outputs.name }} || true diff --git a/.github/workflows/loadtest.yml b/.github/workflows/loadtest.yml new file mode 100644 index 0000000..c997e13 --- /dev/null +++ b/.github/workflows/loadtest.yml @@ -0,0 +1,112 @@ +name: Load Test (Full) + +on: + issue_comment: + types: [created] + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + loadtest: + # Only run on PR comments with /loadtest command + if: | + github.event.issue.pull_request && + contains(github.event.comment.body, '/loadtest') + runs-on: ubuntu-latest + + steps: + - name: Add reaction to comment + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: 'rocket' + }); + + - name: Get PR details + id: pr + uses: actions/github-script@v7 + with: + script: | + const pr = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number + }); + core.setOutput('head_ref', pr.data.head.ref); + core.setOutput('head_sha', pr.data.head.sha); + core.setOutput('base_ref', pr.data.base.ref); + core.setOutput('base_sha', pr.data.base.sha); + console.log(`PR #${context.issue.number}: ${pr.data.head.ref} -> ${pr.data.base.ref}`); + + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + ref: ${{ steps.pr.outputs.head_sha }} + fetch-depth: 0 # Full history for building from base ref + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: false + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Install kind + run: | + curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64 + chmod +x ./kind + sudo mv ./kind /usr/local/bin/kind + + - name: Install kubectl + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + sudo mv kubectl /usr/local/bin/kubectl + + - name: Run full A/B comparison load test + id: loadtest + uses: ./.github/actions/loadtest + with: + old-ref: ${{ steps.pr.outputs.base_sha }} + new-ref: ${{ steps.pr.outputs.head_sha }} + scenarios: 'all' + test-type: 'full' + post-comment: 'true' + pr-number: ${{ github.event.issue.number }} + comment-header: | + ## Load Test Results (Full A/B Comparison) + **Comparing:** `${{ steps.pr.outputs.base_ref }}` → `${{ steps.pr.outputs.head_ref }}` + **Triggered by:** @${{ github.event.comment.user.login }} + + - name: Add success reaction + if: steps.loadtest.outputs.status == 'pass' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: '+1' + }); + + - name: Add failure reaction + if: steps.loadtest.outputs.status == 'fail' + uses: actions/github-script@v7 + with: + script: | + await github.rest.reactions.createForIssueComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: context.payload.comment.id, + content: '-1' + }); diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index e4b1c6f..c428826 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -35,6 +35,8 @@ jobs: permissions: contents: read + pull-requests: write + issues: write runs-on: ubuntu-latest name: Build @@ -109,6 +111,17 @@ jobs: - name: Test run: make test + - name: Run quick A/B load tests + uses: ./.github/actions/loadtest + with: + old-ref: ${{ github.event.pull_request.base.sha }} + # new-ref defaults to current checkout (PR branch) + scenarios: 'S1,S4,S6' + test-type: 'quick' + kind-cluster: 'kind' # Use the existing cluster created above + post-comment: 'true' + pr-number: ${{ github.event.pull_request.number }} + - name: Generate Tags id: generate_tag run: | diff --git a/.gitignore b/.gitignore index 73da63e..3f28c3f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,9 +11,14 @@ vendor dist Reloader !**/chart/reloader +!**/internal/reloader *.tgz styles/ site/ /mkdocs.yml yq -bin \ No newline at end of file +bin +test/loadtest/results +test/loadtest/loadtest +# Temporary NFS files +.nfs* diff --git a/Makefile b/Makefile index 3c15d05..99d107e 100644 --- a/Makefile +++ b/Makefile @@ -228,3 +228,43 @@ yq-install: @curl -sL $(YQ_DOWNLOAD_URL) -o $(YQ_BIN) @chmod +x $(YQ_BIN) @echo "yq $(YQ_VERSION) installed at $(YQ_BIN)" + +# ============================================================================= +# Load Testing +# ============================================================================= + +LOADTEST_BIN = test/loadtest/loadtest +LOADTEST_OLD_IMAGE ?= localhost/reloader:old +LOADTEST_NEW_IMAGE ?= localhost/reloader:new +LOADTEST_DURATION ?= 60 +LOADTEST_SCENARIOS ?= all + +.PHONY: loadtest-build loadtest-quick loadtest-full loadtest loadtest-clean + +loadtest-build: ## Build loadtest binary + cd test/loadtest && $(GOCMD) build -o loadtest ./cmd/loadtest + +loadtest-quick: loadtest-build ## Run quick load tests (S1, S4, S6) + cd test/loadtest && ./loadtest run \ + --old-image=$(LOADTEST_OLD_IMAGE) \ + --new-image=$(LOADTEST_NEW_IMAGE) \ + --scenario=S1,S4,S6 \ + --duration=$(LOADTEST_DURATION) + +loadtest-full: loadtest-build ## Run full load test suite + cd test/loadtest && ./loadtest run \ + --old-image=$(LOADTEST_OLD_IMAGE) \ + --new-image=$(LOADTEST_NEW_IMAGE) \ + --scenario=all \ + --duration=$(LOADTEST_DURATION) + +loadtest: loadtest-build ## Run load tests with configurable scenarios (default: all) + cd test/loadtest && ./loadtest run \ + --old-image=$(LOADTEST_OLD_IMAGE) \ + --new-image=$(LOADTEST_NEW_IMAGE) \ + --scenario=$(LOADTEST_SCENARIOS) \ + --duration=$(LOADTEST_DURATION) + +loadtest-clean: ## Clean loadtest binary and results + rm -f $(LOADTEST_BIN) + rm -rf test/loadtest/results diff --git a/README.md b/README.md index ae0a00a..57a1e62 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ ## 🔁 What is Reloader? -Reloader is a Kubernetes controller that automatically triggers rollouts of workloads (like Deployments, StatefulSets, and more) whenever referenced `Secrets` or `ConfigMaps` are updated. +Reloader is a Kubernetes controller that automatically triggers rollouts of workloads (like Deployments, StatefulSets, and more) whenever referenced `Secrets`, `ConfigMaps` or **optionally CSI-mounted secrets** are updated. In a traditional Kubernetes setup, updating a `Secret` or `ConfigMap` does not automatically restart or redeploy your workloads. This can lead to stale configurations running in production, especially when dealing with dynamic values like credentials, feature flags, or environment configs. @@ -169,9 +169,11 @@ metadata: This instructs Reloader to skip all reload logic for that resource across all workloads. -### 4. ⚙️ Workload-Specific Rollout Strategy +### 4. ⚙️ Workload-Specific Rollout Strategy (Argo Rollouts Only) -By default, Reloader uses the **rollout** strategy — it updates the pod template to trigger a new rollout. This works well in most cases, but it can cause problems if you're using GitOps tools like ArgoCD, which detect this as configuration drift. +Note: This is only applicable when using [Argo Rollouts](https://argoproj.github.io/argo-rollouts/). It is ignored for standard Kubernetes `Deployments`, `StatefulSets`, or `DaemonSets`. To use this feature, Argo Rollouts support must be enabled in Reloader (for example via --is-argo-rollouts=true). + +By default, Reloader triggers the Argo Rollout controller to perform a standard rollout by updating the pod template. This works well in most cases, however, because this modifies the workload spec, GitOps tools like ArgoCD will detect this as "Configuration Drift" and mark your application as OutOfSync. To avoid that, you can switch to the **restart** strategy, which simply restarts the pod without changing the pod template. @@ -192,6 +194,8 @@ metadata: 1. You want a quick restart without changing the workload spec 1. Your platform restricts metadata changes +This setting affects Argo Rollouts behavior, not Argo CD sync settings. + ### 5. ❗ Annotation Behavior Rules & Compatibility - `reloader.stakater.com/auto` and `reloader.stakater.com/search` **cannot be used together** — the `auto` annotation takes precedence. @@ -239,6 +243,61 @@ This feature allows you to pause rollouts for a deployment for a specified durat 1. ✅ Your deployment references multiple ConfigMaps or Secrets that may be updated at the same time. 1. ✅ You want to minimize unnecessary rollouts and reduce downtime caused by back-to-back configuration changes. +### 8. 🔐 CSI Secret Provider Support + +Reloader supports the [Secrets Store CSI Driver](https://secrets-store-csi-driver.sigs.k8s.io/), which allows mounting secrets from external secret stores (like AWS Secrets Manager, Azure Key Vault, HashiCorp Vault) directly into pods. +Unlike Kubernetes Secret objects, CSI-mounted secrets do not always trigger native Kubernetes update events. Reloader solves this by watching CSI status resources and restarting affected workloads when mounted secret versions change. + +#### How it works + +When secret rotation is enabled, the Secrets Store CSI Driver updates a Kubernetes resource called: `SecretProviderClassPodStatus` + +This resource reflects the currently mounted secret versions for a pod. +Reloader watches these updates and triggers a rollout when a change is detected. + +#### Prerequisites + +- Secrets Store CSI Driver must be installed in your cluster +- Secret rotation enabled in the CSI driver. +- Enable CSI integration in Reloader: `--enable-csi-integration=true` + +#### Annotations for CSI-mounted Secrets + +| Annotation | Description | +|------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------| +| `reloader.stakater.com/auto: "true"` | Global Discovery: Automatically discovers and reloads the workload when any mounted ConfigMap or Secret is updated. | +| `secretproviderclass.reloader.stakater.com/auto: 'true'` | CSI Discovery: Specifically watches for updates to all SecretProviderClasses used by the workload (CSI driver integration). | +| `secretproviderclass.reloader.stakater.com/reload: "my-secretproviderclass"` | Targeted Reload: Only reloads the workload when the specifically named SecretProviderClass(es) are updated. | + +Reloader monitors changes at the **per-secret level** by watching the `SecretProviderClassPodStatus`. Make sure each secret you want to monitor is properly defined with a `secretKey` in your `SecretProviderClass`: + +```yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: vault-reloader-demo + namespace: test +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc:8200" + vaultSkipTLSVerify: "true" + roleName: "demo-role" + objects: | + - objectName: "password" + secretPath: "secret/data/reloader-demo" + secretKey: "password" +``` + +***Important***: Reloader tracks changes to individual secrets (identified by `secretKey`). If your SecretProviderClass doesn't specify `secretKey` for each object, Reloader may not detect updates correctly. + +#### Notes & Limitations + +Reloader reacts to CSI status changes, not direct updates to external secret stores +Secret rotation must be enabled in the CSI driver for updates to be detected +CSI limitations (such as `subPath` mounts) still apply and may require pod restarts +If secrets are synced to Kubernetes Secret objects, standard Reloader behavior applies and CSI support may not be required + ## 🚀 Installation ### 1. 📦 Helm @@ -430,7 +489,7 @@ PRs are welcome. In general, we follow the "fork-and-pull" Git workflow: ## Release Processes -_Repository GitHub releases_: As requested by the community in [issue 685](https://github.com/stakater/Reloader/issues/685), Reloader is now based on a manual release process. Releases are no longer done on every merged PR to the main branch, but manually on request. +*Repository GitHub releases*: As requested by the community in [issue 685](https://github.com/stakater/Reloader/issues/685), Reloader is now based on a manual release process. Releases are no longer done on every merged PR to the main branch, but manually on request. To make a GitHub release: @@ -443,7 +502,7 @@ To make a GitHub release: 1. Code owners create another branch from `master` and bump the helm chart version as well as Reloader image version. - Code owners create a PR with `release/helm-chart` label, example: [PR-846](https://github.com/stakater/Reloader/pull/846) -_Repository git tagging_: Push to the main branch will create a merge-image and merge-tag named `merge-${{ github.event.number }}`, for example `merge-800` when pull request number 800 is merged. +*Repository git tagging*: Push to the main branch will create a merge-image and merge-tag named `merge-${{ github.event.number }}`, for example `merge-800` when pull request number 800 is merged. ## Changelog diff --git a/deployments/kubernetes/chart/reloader/templates/clusterrole.yaml b/deployments/kubernetes/chart/reloader/templates/clusterrole.yaml index 9f655aa..bd14dfe 100644 --- a/deployments/kubernetes/chart/reloader/templates/clusterrole.yaml +++ b/deployments/kubernetes/chart/reloader/templates/clusterrole.yaml @@ -105,6 +105,17 @@ rules: - create - get - update +{{- end}} +{{- if .Values.reloader.enableCSIIntegration }} + - apiGroups: + - "secrets-store.csi.x-k8s.io" + resources: + - secretproviderclasspodstatuses + - secretproviderclasses + verbs: + - list + - get + - watch {{- end}} - apiGroups: - "" diff --git a/deployments/kubernetes/chart/reloader/templates/deployment.yaml b/deployments/kubernetes/chart/reloader/templates/deployment.yaml index 16564b2..e568f9f 100644 --- a/deployments/kubernetes/chart/reloader/templates/deployment.yaml +++ b/deployments/kubernetes/chart/reloader/templates/deployment.yaml @@ -210,7 +210,7 @@ spec: {{- . | toYaml | nindent 10 }} {{- end }} {{- end }} - {{- if or (.Values.reloader.logFormat) (.Values.reloader.logLevel) (.Values.reloader.ignoreSecrets) (.Values.reloader.ignoreNamespaces) (include "reloader-namespaceSelector" .) (.Values.reloader.resourceLabelSelector) (.Values.reloader.ignoreConfigMaps) (.Values.reloader.custom_annotations) (eq .Values.reloader.isArgoRollouts true) (eq .Values.reloader.reloadOnCreate true) (eq .Values.reloader.reloadOnDelete true) (ne .Values.reloader.reloadStrategy "default") (.Values.reloader.enableHA) (.Values.reloader.autoReloadAll) (.Values.reloader.ignoreJobs) (.Values.reloader.ignoreCronJobs)}} + {{- if or (.Values.reloader.logFormat) (.Values.reloader.logLevel) (.Values.reloader.ignoreSecrets) (.Values.reloader.ignoreNamespaces) (include "reloader-namespaceSelector" .) (.Values.reloader.resourceLabelSelector) (.Values.reloader.ignoreConfigMaps) (.Values.reloader.custom_annotations) (eq .Values.reloader.isArgoRollouts true) (eq .Values.reloader.reloadOnCreate true) (eq .Values.reloader.reloadOnDelete true) (ne .Values.reloader.reloadStrategy "default") (.Values.reloader.enableHA) (.Values.reloader.autoReloadAll) (.Values.reloader.ignoreJobs) (.Values.reloader.ignoreCronJobs) (.Values.reloader.enableCSIIntegration)}} args: {{- if .Values.reloader.logFormat }} - "--log-format={{ .Values.reloader.logFormat }}" @@ -246,6 +246,9 @@ spec: - "--pprof-addr={{ .Values.reloader.pprofAddr }}" {{- end }} {{- end }} + {{- if .Values.reloader.enableCSIIntegration }} + - "--enable-csi-integration=true" + {{- end }} {{- if .Values.reloader.custom_annotations }} {{- if .Values.reloader.custom_annotations.configmap }} - "--configmap-annotation" diff --git a/deployments/kubernetes/chart/reloader/templates/role.yaml b/deployments/kubernetes/chart/reloader/templates/role.yaml index 70a6815..7355d87 100644 --- a/deployments/kubernetes/chart/reloader/templates/role.yaml +++ b/deployments/kubernetes/chart/reloader/templates/role.yaml @@ -92,6 +92,17 @@ rules: - create - get - update +{{- end}} +{{- if .Values.reloader.enableCSIIntegration }} + - apiGroups: + - "secrets-store.csi.x-k8s.io" + resources: + - secretproviderclasspodstatuses + - secretproviderclasses + verbs: + - list + - get + - watch {{- end}} - apiGroups: - "" diff --git a/deployments/kubernetes/chart/reloader/values.yaml b/deployments/kubernetes/chart/reloader/values.yaml index c9a46a0..a607491 100644 --- a/deployments/kubernetes/chart/reloader/values.yaml +++ b/deployments/kubernetes/chart/reloader/values.yaml @@ -49,6 +49,7 @@ reloader: enableHA: false # Set to true to enable pprof for profiling enablePProf: false + enableCSIIntegration: false # Address to start pprof server on. Default is ":6060" pprofAddr: ":6060" # Set to true if you have a pod security policy that enforces readOnlyRootFilesystem diff --git a/docs/Reloader-vs-ConfigmapController.md b/docs/Reloader-vs-ConfigmapController.md index f866f89..1433daa 100644 --- a/docs/Reloader-vs-ConfigmapController.md +++ b/docs/Reloader-vs-ConfigmapController.md @@ -2,10 +2,10 @@ Reloader is inspired from [`configmapcontroller`](https://github.com/fabric8io/configmapcontroller) but there are many ways in which it differs from `configmapcontroller`. Below is the small comparison between these two controllers. -| Reloader | ConfigMap | -|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Reloader can watch both `Secrets` and `ConfigMaps`. | `configmapcontroller` can only watch changes in `ConfigMaps`. It cannot detect changes in other resources like `Secrets`. | -| Reloader can perform rolling upgrades on `deployments` as well as on `statefulsets` and `daemonsets` | `configmapcontroller` can only perform rolling upgrades on `deployments`. It currently does not support rolling upgrades on `statefulsets` and `daemonsets` | -| Reloader provides both unit test cases and end to end integration test cases for future updates. So one can make sure that new changes do not break any old functionality. | Currently there are not any unit test cases or end to end integration test cases in `configmap-controller`. It add difficulties for any additional updates in `configmap-controller` and one can not know for sure whether new changes breaks any old functionality or not. | -| Reloader uses SHA1 to encode the change in `ConfigMap` or `Secret`. It then saves the SHA1 value in `STAKATER_FOO_CONFIGMAP` or `STAKATER_FOO_SECRET` environment variable depending upon where the change has happened. The use of SHA1 provides a concise 40 characters encoded value that is very less prone to collision. | `configmap-controller` uses `FABRICB_FOO_REVISION` environment variable to store any change in `ConfigMap` controller. It does not encode it or convert it in suitable hash value to avoid data pollution in deployment. | -| Reloader allows you to customize your own annotation (for both `Secrets` and `ConfigMaps`) using command line flags | `configmap-controller` restricts you to only their provided annotation | +| Reloader | ConfigMap | +|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Reloader can watch both `Secrets` and `ConfigMaps`. | `configmapcontroller` can only watch changes in `ConfigMaps`. It cannot detect changes in other resources like `Secrets`. | +| Reloader can perform rolling upgrades on `deployments` as well as on `statefulsets` and `daemonsets` | `configmapcontroller` can only perform rolling upgrades on `deployments`. It currently does not support rolling upgrades on `statefulsets` and `daemonsets` | +| Reloader provides both unit test cases and end to end integration test cases for future updates. So one can make sure that new changes do not break any old functionality. | Currently there are not any unit test cases or end to end integration test cases in `configmap-controller`. It adds difficulties for any additional updates in `configmap-controller` and one can not know for sure whether new changes breaks any old functionality or not. | +| Reloader uses SHA1 to encode the change in `ConfigMap` or `Secret`. It then saves the SHA1 value in `STAKATER_FOO_CONFIGMAP` or `STAKATER_FOO_SECRET` environment variable depending upon where the change has happened. The use of SHA1 provides a concise 40 characters encoded value that is very less prone to collision. | `configmap-controller` uses `FABRICB_FOO_REVISION` environment variable to store any change in `ConfigMap` controller. It does not encode it or convert it in suitable hash value to avoid data pollution in deployment. | +| Reloader allows you to customize your own annotation (for both `Secrets` and `ConfigMaps`) using command line flags | `configmap-controller` restricts you to only their provided annotation | diff --git a/docs/Reloader-vs-k8s-trigger-controller.md b/docs/Reloader-vs-k8s-trigger-controller.md index 811987a..561dca5 100644 --- a/docs/Reloader-vs-k8s-trigger-controller.md +++ b/docs/Reloader-vs-k8s-trigger-controller.md @@ -6,7 +6,7 @@ Reloader and k8s-trigger-controller are both built for same purpose. So there ar - Both controllers support change detection in `ConfigMaps` and `Secrets` - Both controllers support deployment `rollout` -- Both controllers use SHA1 for hashing +- Reloader controller use SHA1 for hashing - Both controllers have end to end as well as unit test cases. ## Differences diff --git a/go.mod b/go.mod index 5417a61..ab3607b 100644 --- a/go.mod +++ b/go.mod @@ -4,82 +4,86 @@ go 1.25.5 require ( github.com/argoproj/argo-rollouts v1.8.3 - github.com/onsi/ginkgo/v2 v2.21.0 - github.com/onsi/gomega v1.35.1 - github.com/openshift/client-go v0.0.0-20250402181141-b3bad3b645f2 + github.com/onsi/ginkgo/v2 v2.27.2 + github.com/onsi/gomega v1.38.2 + github.com/openshift/api v0.0.0-20260102143802-d2ec16864f86 + github.com/openshift/client-go v0.0.0-20251223102348-558b0eef16bc github.com/parnurzeal/gorequest v0.3.0 - github.com/prometheus/client_golang v1.22.0 + github.com/prometheus/client_golang v1.23.2 github.com/sirupsen/logrus v1.9.3 - github.com/spf13/cobra v1.10.1 - github.com/stretchr/testify v1.10.0 - k8s.io/api v0.32.3 - k8s.io/apimachinery v0.32.3 - k8s.io/client-go v0.32.3 - k8s.io/kubectl v0.32.3 - k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 + github.com/spf13/cobra v1.10.2 + github.com/stretchr/testify v1.11.1 + k8s.io/api v0.35.0 + k8s.io/apimachinery v0.35.0 + k8s.io/client-go v0.35.0 + k8s.io/kubectl v0.35.0 + k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2 + sigs.k8s.io/secrets-store-csi-driver v1.5.5 ) require ( + github.com/Masterminds/semver/v3 v3.4.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/elazarl/goproxy v0.0.0-20240726154733-8b0c20506380 // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect - github.com/fxamacker/cbor/v2 v2.8.0 // indirect - github.com/go-logr/logr v1.4.2 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect github.com/go-openapi/jsonpointer v0.21.1 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.1 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/protobuf v1.5.4 // indirect - github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/moul/http2curl v1.0.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/openshift/api v0.0.0-20250411135543-10a8fa583797 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.63.0 // indirect - github.com/prometheus/procfs v0.16.0 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/smartystreets/goconvey v1.7.2 // indirect github.com/spf13/pflag v1.0.9 // indirect github.com/x448/float16 v0.8.4 // indirect - golang.org/x/net v0.39.0 // indirect - golang.org/x/oauth2 v0.29.0 // indirect - golang.org/x/sys v0.32.0 // indirect - golang.org/x/term v0.31.0 // indirect - golang.org/x/text v0.24.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/mod v0.30.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/term v0.38.0 // indirect + golang.org/x/text v0.32.0 // indirect golang.org/x/time v0.11.0 // indirect - golang.org/x/tools v0.26.0 // indirect - google.golang.org/protobuf v1.36.6 // indirect - gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + golang.org/x/tools v0.39.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect - sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) // Replacements for argo-rollouts replace ( github.com/go-check/check => github.com/go-check/check v0.0.0-20201130134442-10cb98267c6c - k8s.io/api v0.0.0 => k8s.io/api v0.32.3 - k8s.io/apimachinery v0.0.0 => k8s.io/apimachinery v0.32.3 - k8s.io/client-go v0.0.0 => k8s.io/client-go v0.32.3 + k8s.io/api v0.0.0 => k8s.io/api v0.35.0 + k8s.io/apimachinery v0.0.0 => k8s.io/apimachinery v0.35.0 + k8s.io/client-go v0.0.0 => k8s.io/client-go v0.35.0 k8s.io/cloud-provider v0.0.0 => k8s.io/cloud-provider v0.24.2 k8s.io/controller-manager v0.0.0 => k8s.io/controller-manager v0.24.2 k8s.io/cri-api v0.0.0 => k8s.io/cri-api v0.20.5-rc.0 @@ -88,7 +92,7 @@ replace ( k8s.io/kube-controller-manager v0.0.0 => k8s.io/kube-controller-manager v0.24.2 k8s.io/kube-proxy v0.0.0 => k8s.io/kube-proxy v0.24.2 k8s.io/kube-scheduler v0.0.0 => k8s.io/kube-scheduler v0.24.2 - k8s.io/kubectl v0.0.0 => k8s.io/kubectl v0.32.3 + k8s.io/kubectl v0.0.0 => k8s.io/kubectl v0.35.0 k8s.io/kubelet v0.0.0 => k8s.io/kubelet v0.24.2 k8s.io/legacy-cloud-providers v0.0.0 => k8s.io/legacy-cloud-providers v0.24.2 k8s.io/mount-utils v0.0.0 => k8s.io/mount-utils v0.20.5-rc.0 diff --git a/go.sum b/go.sum index 59339ea..50dd7d0 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/argoproj/argo-rollouts v1.8.3 h1:blbtQva4IK9r6gFh+dWkCrLnFdPOWiv9ubQYu36qeaA= github.com/argoproj/argo-rollouts v1.8.3/go.mod h1:kCAUvIfMGfOyVf3lvQbBt0nqQn4Pd+zB5/YwKv+UBa8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -13,10 +15,16 @@ github.com/elazarl/goproxy v0.0.0-20240726154733-8b0c20506380 h1:1NyRx2f4W4WBRyg github.com/elazarl/goproxy v0.0.0-20240726154733-8b0c20506380/go.mod h1:thX175TtLTzLj3p7N/Q9IiKZ7NF+p72cvL91emV0hzo= github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/fxamacker/cbor/v2 v2.8.0 h1:fFtUGXUzXPHTIUdne5+zzMPTfffl3RD5qYnkY40vtxU= -github.com/fxamacker/cbor/v2 v2.8.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= +github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= +github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= +github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= +github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= +github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-openapi/jsonpointer v0.21.1 h1:whnzv/pNXtK2FbX/W9yJfRmE2gsmkfahjMKB0fZvcic= github.com/go-openapi/jsonpointer v0.21.1/go.mod h1:50I1STOfbY1ycR8jGz8DaMeLCdXiI6aDteEdRNNzpdk= github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= @@ -25,20 +33,17 @@ github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZ github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= -github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= -github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= @@ -47,6 +52,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= +github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= @@ -63,23 +70,28 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= +github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= +github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= +github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= +github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= -github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= -github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= -github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= -github.com/openshift/api v0.0.0-20250411135543-10a8fa583797 h1:8x3G8QOZqo2bRAL8JFlPz/odqQECI/XmlZeRwnFxJ8I= -github.com/openshift/api v0.0.0-20250411135543-10a8fa583797/go.mod h1:yk60tHAmHhtVpJQo3TwVYq2zpuP70iJIFDCmeKMIzPw= -github.com/openshift/client-go v0.0.0-20250402181141-b3bad3b645f2 h1:bPXR0R8zp1o12nSUphN26hSM+OKYq5pMorbDCpApzDQ= -github.com/openshift/client-go v0.0.0-20250402181141-b3bad3b645f2/go.mod h1:dT1cJyVTperQ53GvVRa+GZ27r02fDZy2k5j+9QoQsCo= +github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns= +github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= +github.com/openshift/api v0.0.0-20260102143802-d2ec16864f86 h1:Vsqg+WqSA91LjrwK5lzkSCjztK/B+T8MPKI3MIALx3w= +github.com/openshift/api v0.0.0-20260102143802-d2ec16864f86/go.mod h1:d5uzF0YN2nQQFA0jIEWzzOZ+edmo6wzlGLvx5Fhz4uY= +github.com/openshift/client-go v0.0.0-20251223102348-558b0eef16bc h1:nIlRaJfr/yGjPV15MNF5eVHLAGyXFjcUzO+hXeWDDk8= +github.com/openshift/client-go v0.0.0-20251223102348-558b0eef16bc/go.mod h1:cs9BwTu96sm2vQvy7r9rOiltgu90M6ju2qIHFG9WU+o= github.com/parnurzeal/gorequest v0.3.0 h1:SoFyqCDC9COr1xuS6VA8fC8RU7XyrJZN2ona1kEX7FI= github.com/parnurzeal/gorequest v0.3.0/go.mod h1:3Kh2QUMJoqw3icWAecsyzkpY7UzRfDhbRdTjtNwNiUE= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -87,16 +99,16 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= -github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k= -github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18= -github.com/prometheus/procfs v0.16.0 h1:xh6oHhKwnOJKMYiYBDWmkHqQPyiY40sny36Cmx2bbsM= -github.com/prometheus/procfs v0.16.0/go.mod h1:8veyXUu3nGP7oaCxhX6yeaM5u4stL2FeMXnCqhDthZg= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= @@ -104,50 +116,68 @@ github.com/smartystreets/assertions v1.2.0 h1:42S6lae5dvLc7BrLu/0ugRtcFVjoJNMC/N github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hgR6gDIPg= github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM= -github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= -github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= +github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= +golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= -golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= -golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98= -golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o= -golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= +golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= -golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -155,44 +185,45 @@ golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= -golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= +golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.32.3 h1:Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls= -k8s.io/api v0.32.3/go.mod h1:2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k= -k8s.io/apimachinery v0.32.3 h1:JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U= -k8s.io/apimachinery v0.32.3/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= -k8s.io/client-go v0.32.3 h1:RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU= -k8s.io/client-go v0.32.3/go.mod h1:3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY= +k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY= +k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA= +k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8= +k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns= +k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE= +k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= -k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= -k8s.io/kubectl v0.32.3 h1:VMi584rbboso+yjfv0d8uBHwwxbC438LKq+dXd5tOAI= -k8s.io/kubectl v0.32.3/go.mod h1:6Euv2aso5GKzo/UVMacV6C7miuyevpfI91SvBvV9Zdg= -k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= -k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= -sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/kubectl v0.35.0 h1:cL/wJKHDe8E8+rP3G7avnymcMg6bH6JEcR5w5uo06wc= +k8s.io/kubectl v0.35.0/go.mod h1:VR5/TSkYyxZwrRwY5I5dDq6l5KXmiCb+9w8IKplk3Qo= +k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2 h1:OfgiEo21hGiwx1oJUU5MpEaeOEg6coWndBkZF/lkFuE= +k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= -sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/secrets-store-csi-driver v1.5.5 h1:LJDpDL5TILhlP68nGvtGSlJFxSDgAD2m148NT0Ts7os= +sigs.k8s.io/secrets-store-csi-driver v1.5.5/go.mod h1:i2WqLicYH00hrTG3JAzICPMF4HL4KMEORlDt9UQoZLk= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/pkg/callbacks/rolling_upgrade_test.go b/internal/pkg/callbacks/rolling_upgrade_test.go index 452867f..75583de 100644 --- a/internal/pkg/callbacks/rolling_upgrade_test.go +++ b/internal/pkg/callbacks/rolling_upgrade_test.go @@ -49,7 +49,7 @@ func newTestFixtures() testFixtures { func setupTestClients() kube.Clients { return kube.Clients{ - KubernetesClient: fake.NewSimpleClientset(), + KubernetesClient: fake.NewClientset(), ArgoRolloutClient: fakeargoclientset.NewSimpleClientset(), } } diff --git a/internal/pkg/cmd/reloader.go b/internal/pkg/cmd/reloader.go index f54d757..771e2df 100644 --- a/internal/pkg/cmd/reloader.go +++ b/internal/pkg/cmd/reloader.go @@ -160,6 +160,10 @@ func startReloader(cmd *cobra.Command, args []string) { var controllers []*controller.Controller for k := range kube.ResourceMap { + if k == constants.SecretProviderClassController && !shouldRunCSIController() { + continue + } + if ignoredResourcesList.Contains(k) || (len(namespaceLabelSelector) == 0 && k == "namespaces") { continue } @@ -207,3 +211,15 @@ func startPProfServer() { logrus.Errorf("Failed to start pprof server: %v", err) } } + +func shouldRunCSIController() bool { + if !options.EnableCSIIntegration { + logrus.Info("Skipping secretproviderclasspodstatuses controller: EnableCSIIntegration is disabled") + return false + } + if !kube.IsCSIInstalled { + logrus.Info("Skipping secretproviderclasspodstatuses controller: CSI CRDs not installed") + return false + } + return true +} diff --git a/internal/pkg/constants/constants.go b/internal/pkg/constants/constants.go index 18d1cc7..8025a29 100644 --- a/internal/pkg/constants/constants.go +++ b/internal/pkg/constants/constants.go @@ -8,6 +8,8 @@ const ( ConfigmapEnvVarPostfix = "CONFIGMAP" // SecretEnvVarPostfix is a postfix for secret envVar SecretEnvVarPostfix = "SECRET" + // SecretProviderClassEnvVarPostfix is a postfix for secretproviderclasspodstatus envVar + SecretProviderClassEnvVarPostfix = "SECRETPROVIDERCLASS" // EnvVarPrefix is a Prefix for environment variable EnvVarPrefix = "STAKATER_" @@ -22,6 +24,8 @@ const ( EnvVarsReloadStrategy = "env-vars" // AnnotationsReloadStrategy instructs Reloader to add pod template annotations to facilitate a restart AnnotationsReloadStrategy = "annotations" + // SecretProviderClassController enables support for SecretProviderClassPodStatus resources + SecretProviderClassController = "secretproviderclasspodstatuses" ) // Leadership election related consts diff --git a/internal/pkg/controller/controller.go b/internal/pkg/controller/controller.go index 15b2e0f..1a51d9a 100644 --- a/internal/pkg/controller/controller.go +++ b/internal/pkg/controller/controller.go @@ -2,9 +2,11 @@ package controller import ( "fmt" + "slices" "time" "github.com/sirupsen/logrus" + "github.com/stakater/Reloader/internal/pkg/constants" "github.com/stakater/Reloader/internal/pkg/handler" "github.com/stakater/Reloader/internal/pkg/metrics" "github.com/stakater/Reloader/internal/pkg/options" @@ -21,7 +23,7 @@ import ( "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" "k8s.io/kubectl/pkg/scheme" - "k8s.io/utils/strings/slices" + csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" ) // Controller for checking events @@ -79,7 +81,12 @@ func NewController( } } - listWatcher := cache.NewFilteredListWatchFromClient(client.CoreV1().RESTClient(), resource, namespace, optionsModifier) + getterRESTClient, err := getClientForResource(resource, client) + if err != nil { + return nil, fmt.Errorf("failed to initialize REST client for %s: %w", resource, err) + } + + listWatcher := cache.NewFilteredListWatchFromClient(getterRESTClient, resource, namespace, optionsModifier) _, informer := cache.NewInformerWithOptions(cache.InformerOptions{ ListerWatcher: listWatcher, @@ -103,30 +110,38 @@ func NewController( // Add function to add a new object to the queue in case of creating a resource func (c *Controller) Add(obj interface{}) { + c.collectors.RecordEventReceived("add", c.resource) switch object := obj.(type) { case *v1.Namespace: c.addSelectedNamespaceToCache(*object) return + case *csiv1.SecretProviderClassPodStatus: + return } if options.ReloadOnCreate == "true" { if !c.resourceInIgnoredNamespace(obj) && c.resourceInSelectedNamespaces(obj) && secretControllerInitialized && configmapControllerInitialized { - c.queue.Add(handler.ResourceCreatedHandler{ - Resource: obj, - Collectors: c.collectors, - Recorder: c.recorder, + c.enqueue(handler.ResourceCreatedHandler{ + Resource: obj, + Collectors: c.collectors, + Recorder: c.recorder, + EnqueueTime: time.Now(), }) + } else { + c.collectors.RecordSkipped("ignored_or_not_selected") } } } func (c *Controller) resourceInIgnoredNamespace(raw interface{}) bool { - switch object := raw.(type) { + switch obj := raw.(type) { case *v1.ConfigMap: - return c.ignoredNamespaces.Contains(object.Namespace) + return c.ignoredNamespaces.Contains(obj.Namespace) case *v1.Secret: - return c.ignoredNamespaces.Contains(object.Namespace) + return c.ignoredNamespaces.Contains(obj.Namespace) + case *csiv1.SecretProviderClassPodStatus: + return c.ignoredNamespaces.Contains(obj.Namespace) } return false } @@ -145,6 +160,10 @@ func (c *Controller) resourceInSelectedNamespaces(raw interface{}) bool { if slices.Contains(selectedNamespacesCache, object.GetNamespace()) { return true } + case *csiv1.SecretProviderClassPodStatus: + if slices.Contains(selectedNamespacesCache, object.GetNamespace()) { + return true + } } return false } @@ -166,31 +185,44 @@ func (c *Controller) removeSelectedNamespaceFromCache(namespace v1.Namespace) { // Update function to add an old object and a new object to the queue in case of updating a resource func (c *Controller) Update(old interface{}, new interface{}) { + c.collectors.RecordEventReceived("update", c.resource) + switch new.(type) { case *v1.Namespace: return } if !c.resourceInIgnoredNamespace(new) && c.resourceInSelectedNamespaces(new) { - c.queue.Add(handler.ResourceUpdatedHandler{ + c.enqueue(handler.ResourceUpdatedHandler{ Resource: new, OldResource: old, Collectors: c.collectors, Recorder: c.recorder, + EnqueueTime: time.Now(), }) + } else { + c.collectors.RecordSkipped("ignored_or_not_selected") } } // Delete function to add an object to the queue in case of deleting a resource func (c *Controller) Delete(old interface{}) { + c.collectors.RecordEventReceived("delete", c.resource) + + if _, ok := old.(*csiv1.SecretProviderClassPodStatus); ok { + return + } if options.ReloadOnDelete == "true" { if !c.resourceInIgnoredNamespace(old) && c.resourceInSelectedNamespaces(old) && secretControllerInitialized && configmapControllerInitialized { - c.queue.Add(handler.ResourceDeleteHandler{ - Resource: old, - Collectors: c.collectors, - Recorder: c.recorder, + c.enqueue(handler.ResourceDeleteHandler{ + Resource: old, + Collectors: c.collectors, + Recorder: c.recorder, + EnqueueTime: time.Now(), }) + } else { + c.collectors.RecordSkipped("ignored_or_not_selected") } } @@ -201,6 +233,13 @@ func (c *Controller) Delete(old interface{}) { } } +// enqueue adds an item to the queue and records metrics +func (c *Controller) enqueue(item interface{}) { + c.queue.Add(item) + c.collectors.RecordQueueAdd() + c.collectors.SetQueueDepth(c.queue.Len()) +} + // Run function for controller which handles the queue func (c *Controller) Run(threadiness int, stopCh chan struct{}) { defer runtime.HandleCrash() @@ -242,13 +281,34 @@ func (c *Controller) processNextItem() bool { if quit { return false } + + c.collectors.SetQueueDepth(c.queue.Len()) + // Tell the queue that we are done with processing this key. This unblocks the key for other workers // This allows safe parallel processing because two events with the same key are never processed in // parallel. defer c.queue.Done(resourceHandler) + // Record queue latency if the handler supports it + if h, ok := resourceHandler.(handler.TimedHandler); ok { + queueLatency := time.Since(h.GetEnqueueTime()) + c.collectors.RecordQueueLatency(queueLatency) + } + + // Track reconcile/handler duration + startTime := time.Now() + // Invoke the method containing the business logic err := resourceHandler.(handler.ResourceHandler).Handle() + + duration := time.Since(startTime) + + if err != nil { + c.collectors.RecordReconcile("error", duration) + } else { + c.collectors.RecordReconcile("success", duration) + } + // Handle the error if something went wrong during the execution of the business logic c.handleErr(err, resourceHandler) return true @@ -261,16 +321,26 @@ func (c *Controller) handleErr(err error, key interface{}) { // This ensures that future processing of updates for this key is not delayed because of // an outdated error history. c.queue.Forget(key) + + // Record successful event processing + c.collectors.RecordEventProcessed("unknown", c.resource, "success") return } + // Record error + c.collectors.RecordError("handler_error") + // This controller retries 5 times if something goes wrong. After that, it stops trying. if c.queue.NumRequeues(key) < 5 { logrus.Errorf("Error syncing events: %v", err) + // Record retry + c.collectors.RecordRetry() + // Re-enqueue the key rate limited. Based on the rate limiter on the // queue and the re-enqueue history, the key will be processed later again. c.queue.AddRateLimited(key) + c.collectors.SetQueueDepth(c.queue.Len()) return } @@ -279,4 +349,17 @@ func (c *Controller) handleErr(err error, key interface{}) { runtime.HandleError(err) logrus.Errorf("Dropping key out of the queue: %v", err) logrus.Debugf("Dropping the key %q out of the queue: %v", key, err) + + c.collectors.RecordEventProcessed("unknown", c.resource, "dropped") +} + +func getClientForResource(resource string, coreClient kubernetes.Interface) (cache.Getter, error) { + if resource == constants.SecretProviderClassController { + csiClient, err := kube.GetCSIClient() + if err != nil { + return nil, fmt.Errorf("failed to get CSI client: %w", err) + } + return csiClient.SecretsstoreV1().RESTClient(), nil + } + return coreClient.CoreV1().RESTClient(), nil } diff --git a/internal/pkg/crypto/sha_test.go b/internal/pkg/crypto/sha_test.go index 60d5af6..5cb0afc 100644 --- a/internal/pkg/crypto/sha_test.go +++ b/internal/pkg/crypto/sha_test.go @@ -13,3 +13,16 @@ func TestGenerateSHA(t *testing.T) { t.Errorf("Failed to generate SHA") } } + +// TestGenerateSHAEmptyString verifies that empty string generates a valid hash +// This ensures consistent behavior and avoids issues with string matching operations +func TestGenerateSHAEmptyString(t *testing.T) { + result := GenerateSHA("") + expected := "da39a3ee5e6b4b0d3255bfef95601890afd80709" + if result != expected { + t.Errorf("Failed to generate SHA for empty string. Expected: %s, Got: %s", expected, result) + } + if len(result) != 40 { + t.Errorf("SHA hash should be 40 characters long, got %d", len(result)) + } +} diff --git a/internal/pkg/handler/create.go b/internal/pkg/handler/create.go index fab7378..d676610 100644 --- a/internal/pkg/handler/create.go +++ b/internal/pkg/handler/create.go @@ -1,6 +1,8 @@ package handler import ( + "time" + "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/metrics" "github.com/stakater/Reloader/internal/pkg/options" @@ -11,25 +13,46 @@ import ( // ResourceCreatedHandler contains new objects type ResourceCreatedHandler struct { - Resource interface{} - Collectors metrics.Collectors - Recorder record.EventRecorder + Resource interface{} + Collectors metrics.Collectors + Recorder record.EventRecorder + EnqueueTime time.Time // Time when this handler was added to the queue +} + +// GetEnqueueTime returns when this handler was enqueued +func (r ResourceCreatedHandler) GetEnqueueTime() time.Time { + return r.EnqueueTime } // Handle processes the newly created resource func (r ResourceCreatedHandler) Handle() error { + startTime := time.Now() + result := "error" + + defer func() { + r.Collectors.RecordReconcile(result, time.Since(startTime)) + }() + if r.Resource == nil { logrus.Errorf("Resource creation handler received nil resource") - } else { - config, _ := r.GetConfig() - // Send webhook - if options.WebhookUrl != "" { - return sendUpgradeWebhook(config, options.WebhookUrl) - } - // process resource based on its type - return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) + return nil } - return nil + + config, _ := r.GetConfig() + // Send webhook + if options.WebhookUrl != "" { + err := sendUpgradeWebhook(config, options.WebhookUrl) + if err == nil { + result = "success" + } + return err + } + // process resource based on its type + err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) + if err == nil { + result = "success" + } + return err } // GetConfig gets configurations containing SHA, annotations, namespace and resource name diff --git a/internal/pkg/handler/delete.go b/internal/pkg/handler/delete.go index 65c671e..34e032b 100644 --- a/internal/pkg/handler/delete.go +++ b/internal/pkg/handler/delete.go @@ -3,6 +3,7 @@ package handler import ( "fmt" "slices" + "time" "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/callbacks" @@ -20,25 +21,46 @@ import ( // ResourceDeleteHandler contains new objects type ResourceDeleteHandler struct { - Resource interface{} - Collectors metrics.Collectors - Recorder record.EventRecorder + Resource interface{} + Collectors metrics.Collectors + Recorder record.EventRecorder + EnqueueTime time.Time // Time when this handler was added to the queue +} + +// GetEnqueueTime returns when this handler was enqueued +func (r ResourceDeleteHandler) GetEnqueueTime() time.Time { + return r.EnqueueTime } // Handle processes resources being deleted func (r ResourceDeleteHandler) Handle() error { + startTime := time.Now() + result := "error" + + defer func() { + r.Collectors.RecordReconcile(result, time.Since(startTime)) + }() + if r.Resource == nil { logrus.Errorf("Resource delete handler received nil resource") - } else { - config, _ := r.GetConfig() - // Send webhook - if options.WebhookUrl != "" { - return sendUpgradeWebhook(config, options.WebhookUrl) - } - // process resource based on its type - return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy) + return nil } - return nil + + config, _ := r.GetConfig() + // Send webhook + if options.WebhookUrl != "" { + err := sendUpgradeWebhook(config, options.WebhookUrl) + if err == nil { + result = "success" + } + return err + } + // process resource based on its type + err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy) + if err == nil { + result = "success" + } + return err } // GetConfig gets configurations containing SHA, annotations, namespace and resource name diff --git a/internal/pkg/handler/handler.go b/internal/pkg/handler/handler.go index 1f5858e..9018f80 100644 --- a/internal/pkg/handler/handler.go +++ b/internal/pkg/handler/handler.go @@ -1,9 +1,18 @@ package handler -import "github.com/stakater/Reloader/pkg/common" +import ( + "time" + + "github.com/stakater/Reloader/pkg/common" +) // ResourceHandler handles the creation and update of resources type ResourceHandler interface { Handle() error GetConfig() (common.Config, string) } + +// TimedHandler is a handler that tracks when it was enqueued +type TimedHandler interface { + GetEnqueueTime() time.Time +} diff --git a/internal/pkg/handler/pause_deployment_test.go b/internal/pkg/handler/pause_deployment_test.go index c14cbfc..19e7ac6 100644 --- a/internal/pkg/handler/pause_deployment_test.go +++ b/internal/pkg/handler/pause_deployment_test.go @@ -244,7 +244,7 @@ func TestHandleMissingTimerSimple(t *testing.T) { }() t.Run(test.name, func(t *testing.T) { - fakeClient := testclient.NewSimpleClientset() + fakeClient := testclient.NewClientset() clients := kube.Clients{ KubernetesClient: fakeClient, } @@ -337,7 +337,7 @@ func TestPauseDeployment(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - fakeClient := testclient.NewSimpleClientset() + fakeClient := testclient.NewClientset() clients := kube.Clients{ KubernetesClient: fakeClient, } diff --git a/internal/pkg/handler/update.go b/internal/pkg/handler/update.go index ae0bb1e..3fde98e 100644 --- a/internal/pkg/handler/update.go +++ b/internal/pkg/handler/update.go @@ -1,6 +1,8 @@ package handler import ( + "time" + "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/metrics" "github.com/stakater/Reloader/internal/pkg/options" @@ -8,6 +10,7 @@ import ( "github.com/stakater/Reloader/pkg/common" v1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/record" + csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" ) // ResourceUpdatedHandler contains updated objects @@ -16,38 +19,79 @@ type ResourceUpdatedHandler struct { OldResource interface{} Collectors metrics.Collectors Recorder record.EventRecorder + EnqueueTime time.Time // Time when this handler was added to the queue +} + +// GetEnqueueTime returns when this handler was enqueued +func (r ResourceUpdatedHandler) GetEnqueueTime() time.Time { + return r.EnqueueTime } // Handle processes the updated resource func (r ResourceUpdatedHandler) Handle() error { + startTime := time.Now() + result := "error" + + defer func() { + r.Collectors.RecordReconcile(result, time.Since(startTime)) + }() + if r.Resource == nil || r.OldResource == nil { logrus.Errorf("Resource update handler received nil resource") - } else { - config, oldSHAData := r.GetConfig() - if config.SHAValue != oldSHAData { - // Send a webhook if update - if options.WebhookUrl != "" { - return sendUpgradeWebhook(config, options.WebhookUrl) - } - // process resource based on its type - return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) - } + return nil } + + config, oldSHAData := r.GetConfig() + if config.SHAValue != oldSHAData { + // Send a webhook if update + if options.WebhookUrl != "" { + err := sendUpgradeWebhook(config, options.WebhookUrl) + if err == nil { + result = "success" + } + return err + } + // process resource based on its type + err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy) + if err == nil { + result = "success" + } + return err + } + + // No data change - skip + result = "skipped" + r.Collectors.RecordSkipped("no_data_change") return nil } // GetConfig gets configurations containing SHA, annotations, namespace and resource name func (r ResourceUpdatedHandler) GetConfig() (common.Config, string) { - var oldSHAData string - var config common.Config - if _, ok := r.Resource.(*v1.ConfigMap); ok { - oldSHAData = util.GetSHAfromConfigmap(r.OldResource.(*v1.ConfigMap)) - config = common.GetConfigmapConfig(r.Resource.(*v1.ConfigMap)) - } else if _, ok := r.Resource.(*v1.Secret); ok { - oldSHAData = util.GetSHAfromSecret(r.OldResource.(*v1.Secret).Data) - config = common.GetSecretConfig(r.Resource.(*v1.Secret)) - } else { - logrus.Warnf("Invalid resource: Resource should be 'Secret' or 'Configmap' but found, %v", r.Resource) + var ( + oldSHAData string + config common.Config + ) + + switch res := r.Resource.(type) { + case *v1.ConfigMap: + if old, ok := r.OldResource.(*v1.ConfigMap); ok && old != nil { + oldSHAData = util.GetSHAfromConfigmap(old) + } + config = common.GetConfigmapConfig(res) + + case *v1.Secret: + if old, ok := r.OldResource.(*v1.Secret); ok && old != nil { + oldSHAData = util.GetSHAfromSecret(old.Data) + } + config = common.GetSecretConfig(res) + + case *csiv1.SecretProviderClassPodStatus: + if old, ok := r.OldResource.(*csiv1.SecretProviderClassPodStatus); ok && old != nil && old.Status.Objects != nil { + oldSHAData = util.GetSHAfromSecretProviderClassPodStatus(old.Status) + } + config = common.GetSecretProviderClassPodStatusConfig(res) + default: + logrus.Warnf("Invalid resource: Resource should be 'Secret', 'Configmap' or 'SecretProviderClassPodStatus' but found, %T", r.Resource) } return config, oldSHAData } diff --git a/internal/pkg/handler/upgrade.go b/internal/pkg/handler/upgrade.go index 6f185f1..982dbfa 100644 --- a/internal/pkg/handler/upgrade.go +++ b/internal/pkg/handler/upgrade.go @@ -2,11 +2,14 @@ package handler import ( "bytes" + "context" "encoding/json" "errors" "fmt" "io" "os" + "strings" + "time" "github.com/parnurzeal/gorequest" "github.com/prometheus/client_golang/prometheus" @@ -23,6 +26,7 @@ import ( v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" patchtypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" @@ -236,23 +240,35 @@ func rollingUpgrade(clients kube.Clients, config common.Config, upgradeFuncs cal func PerformAction(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy) error { items := upgradeFuncs.ItemsFunc(clients, config.Namespace) + // Record workloads scanned + collectors.RecordWorkloadsScanned(upgradeFuncs.ResourceType, len(items)) + + matchedCount := 0 for _, item := range items { - err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) error { + matched, err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) (bool, error) { return upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource) }) if err != nil { return err } + if matched { + matchedCount++ + } } + // Record workloads matched + collectors.RecordWorkloadsMatched(upgradeFuncs.ResourceType, matchedCount) + return nil } -func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error { +func retryOnConflict(backoff wait.Backoff, fn func(_ bool) (bool, error)) (bool, error) { var lastError error + var matched bool fetchResource := false // do not fetch resource on first attempt, already done by ItemsFunc err := wait.ExponentialBackoff(backoff, func() (bool, error) { - err := fn(fetchResource) + var err error + matched, err = fn(fetchResource) fetchResource = true switch { case err == nil: @@ -267,35 +283,42 @@ func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error { if wait.Interrupted(err) { err = lastError } - return err + return matched, err } -func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) error { +func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) (bool, error) { + actionStartTime := time.Now() + accessor, err := meta.Accessor(resource) if err != nil { - return err + return false, err } resourceName := accessor.GetName() if fetchResource { resource, err = upgradeFuncs.ItemFunc(clients, resourceName, config.Namespace) if err != nil { - return err + return false, err } } + if config.Type == constants.SecretProviderClassEnvVarPostfix { + populateAnnotationsFromSecretProviderClass(clients, &config) + } + annotations := upgradeFuncs.AnnotationsFunc(resource) podAnnotations := upgradeFuncs.PodAnnotationsFunc(resource) result := common.ShouldReload(config, upgradeFuncs.ResourceType, annotations, podAnnotations, common.GetCommandLineOptions()) if !result.ShouldReload { logrus.Debugf("No changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace) - return nil + return false, nil } strategyResult := strategy(upgradeFuncs, resource, config, result.AutoReload) if strategyResult.Result != constants.Updated { - return nil + collectors.RecordSkipped("strategy_not_updated") + return false, nil } // find correct annotation and update the resource @@ -309,7 +332,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca _, err = PauseDeployment(deployment, clients, config.Namespace, pauseInterval) if err != nil { logrus.Errorf("Failed to pause deployment '%s' in namespace '%s': %v", resourceName, config.Namespace, err) - return err + return true, err } } } @@ -320,16 +343,19 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca err = upgradeFuncs.UpdateFunc(clients, config.Namespace, resource) } + actionLatency := time.Since(actionStartTime) + if err != nil { message := fmt.Sprintf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err) logrus.Errorf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err) collectors.Reloaded.With(prometheus.Labels{"success": "false"}).Inc() collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "false", "namespace": config.Namespace}).Inc() + collectors.RecordAction(upgradeFuncs.ResourceType, "error", actionLatency) if recorder != nil { recorder.Event(resource, v1.EventTypeWarning, "ReloadFail", message) } - return err + return true, err } else { message := fmt.Sprintf("Changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace) message += fmt.Sprintf(", Updated '%s' of type '%s' in namespace '%s'", resourceName, upgradeFuncs.ResourceType, config.Namespace) @@ -338,6 +364,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca collectors.Reloaded.With(prometheus.Labels{"success": "true"}).Inc() collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "true", "namespace": config.Namespace}).Inc() + collectors.RecordAction(upgradeFuncs.ResourceType, "success", actionLatency) alert_on_reload, ok := os.LookupEnv("ALERT_ON_RELOAD") if recorder != nil { recorder.Event(resource, v1.EventTypeNormal, "Reloaded", message) @@ -350,7 +377,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca } } - return nil + return true, nil } func getVolumeMountName(volumes []v1.Volume, mountType string, volumeName string) string { @@ -380,6 +407,10 @@ func getVolumeMountName(volumes []v1.Volume, mountType string, volumeName string } } } + case constants.SecretProviderClassEnvVarPostfix: + if volumes[i].CSI != nil && volumes[i].CSI.VolumeAttributes["secretProviderClass"] == volumeName { + return volumes[i].Name + } } } @@ -516,6 +547,10 @@ func updatePodAnnotations(upgradeFuncs callbacks.RollingUpgradeFuncs, item runti return InvokeStrategyResult{constants.NotUpdated, nil} } + if config.Type == constants.SecretProviderClassEnvVarPostfix && secretProviderClassAnnotationReloaded(pa, config) { + return InvokeStrategyResult{constants.NotUpdated, nil} + } + for k, v := range annotations { pa[k] = v } @@ -523,6 +558,11 @@ func updatePodAnnotations(upgradeFuncs callbacks.RollingUpgradeFuncs, item runti return InvokeStrategyResult{constants.Updated, &Patch{Type: patchtypes.StrategicMergePatchType, Bytes: patch}} } +func secretProviderClassAnnotationReloaded(oldAnnotations map[string]string, newConfig common.Config) bool { + annotation := oldAnnotations[getReloaderAnnotationKey()] + return strings.Contains(annotation, newConfig.ResourceName) && strings.Contains(annotation, newConfig.SHAValue) +} + func getReloaderAnnotationKey() string { return fmt.Sprintf("%s/%s", constants.ReloaderAnnotationPrefix, @@ -573,6 +613,10 @@ func updateContainerEnvVars(upgradeFuncs callbacks.RollingUpgradeFuncs, item run return InvokeStrategyResult{constants.NoContainerFound, nil} } + if config.Type == constants.SecretProviderClassEnvVarPostfix && secretProviderClassEnvReloaded(upgradeFuncs.ContainersFunc(item), envVar, config.SHAValue) { + return InvokeStrategyResult{constants.NotUpdated, nil} + } + //update if env var exists updateResult := updateEnvVar(container, envVar, config.SHAValue) @@ -609,6 +653,32 @@ func updateEnvVar(container *v1.Container, envVar string, shaData string) consta return constants.NoEnvVarFound } +func secretProviderClassEnvReloaded(containers []v1.Container, envVar string, shaData string) bool { + for _, container := range containers { + for _, env := range container.Env { + if env.Name == envVar { + return env.Value == shaData + } + } + } + return false +} + +func populateAnnotationsFromSecretProviderClass(clients kube.Clients, config *common.Config) { + obj, err := clients.CSIClient.SecretsstoreV1().SecretProviderClasses(config.Namespace).Get(context.Background(), config.ResourceName, metav1.GetOptions{}) + annotations := make(map[string]string) + if err != nil { + if apierrors.IsNotFound(err) { + logrus.Warnf("SecretProviderClass '%s' not found in namespace '%s'", config.ResourceName, config.Namespace) + } else { + logrus.Errorf("Failed to get SecretProviderClass '%s' in namespace '%s': %v", config.ResourceName, config.Namespace, err) + } + } else if obj.Annotations != nil { + annotations = obj.Annotations + } + config.ResourceAnnotations = annotations +} + func jsonEscape(toEscape string) (string, error) { bytes, err := json.Marshal(toEscape) if err != nil { diff --git a/internal/pkg/metrics/prometheus.go b/internal/pkg/metrics/prometheus.go index 94153ea..4310393 100644 --- a/internal/pkg/metrics/prometheus.go +++ b/internal/pkg/metrics/prometheus.go @@ -1,54 +1,390 @@ package metrics import ( + "context" "net/http" + "net/url" "os" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + "k8s.io/client-go/tools/metrics" ) +// clientGoRequestMetrics implements metrics.LatencyMetric and metrics.ResultMetric +// to expose client-go's rest_client_requests_total metric +type clientGoRequestMetrics struct { + requestCounter *prometheus.CounterVec + requestLatency *prometheus.HistogramVec +} + +func (m *clientGoRequestMetrics) Increment(ctx context.Context, code string, method string, host string) { + m.requestCounter.WithLabelValues(code, method, host).Inc() +} + +func (m *clientGoRequestMetrics) Observe(ctx context.Context, verb string, u url.URL, latency time.Duration) { + m.requestLatency.WithLabelValues(verb, u.Host).Observe(latency.Seconds()) +} + +var clientGoMetrics = &clientGoRequestMetrics{ + requestCounter: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rest_client_requests_total", + Help: "Number of HTTP requests, partitioned by status code, method, and host.", + }, + []string{"code", "method", "host"}, + ), + requestLatency: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_duration_seconds", + Help: "Request latency in seconds. Broken down by verb and host.", + Buckets: []float64{0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30}, + }, + []string{"verb", "host"}, + ), +} + +func init() { + // Register the metrics collectors + prometheus.MustRegister(clientGoMetrics.requestCounter) + prometheus.MustRegister(clientGoMetrics.requestLatency) + + // Register our metrics implementation with client-go + metrics.RequestResult = clientGoMetrics + metrics.RequestLatency = clientGoMetrics +} + +// Collectors holds all Prometheus metrics collectors for Reloader. type Collectors struct { Reloaded *prometheus.CounterVec ReloadedByNamespace *prometheus.CounterVec + countByNamespace bool + + ReconcileTotal *prometheus.CounterVec // Total reconcile calls by result + ReconcileDuration *prometheus.HistogramVec // Time spent in reconcile/handler + ActionTotal *prometheus.CounterVec // Total actions by workload kind and result + ActionLatency *prometheus.HistogramVec // Time from event to action applied + SkippedTotal *prometheus.CounterVec // Skipped operations by reason + QueueDepth prometheus.Gauge // Current queue depth + QueueAdds prometheus.Counter // Total items added to queue + QueueLatency *prometheus.HistogramVec // Time spent in queue + ErrorsTotal *prometheus.CounterVec // Errors by type + RetriesTotal prometheus.Counter // Total retries + EventsReceived *prometheus.CounterVec // Events received by type (add/update/delete) + EventsProcessed *prometheus.CounterVec // Events processed by type and result + WorkloadsScanned *prometheus.CounterVec // Workloads scanned by kind + WorkloadsMatched *prometheus.CounterVec // Workloads matched for reload by kind +} + +// RecordReload records a reload event with the given success status and namespace. +// Preserved for backward compatibility. +func (c *Collectors) RecordReload(success bool, namespace string) { + if c == nil { + return + } + + successLabel := "false" + if success { + successLabel = "true" + } + + c.Reloaded.With(prometheus.Labels{"success": successLabel}).Inc() + + if c.countByNamespace { + c.ReloadedByNamespace.With(prometheus.Labels{ + "success": successLabel, + "namespace": namespace, + }).Inc() + } +} + +// RecordReconcile records a reconcile/handler invocation. +func (c *Collectors) RecordReconcile(result string, duration time.Duration) { + if c == nil { + return + } + c.ReconcileTotal.With(prometheus.Labels{"result": result}).Inc() + c.ReconcileDuration.With(prometheus.Labels{"result": result}).Observe(duration.Seconds()) +} + +// RecordAction records a reload action on a workload. +func (c *Collectors) RecordAction(workloadKind string, result string, latency time.Duration) { + if c == nil { + return + } + c.ActionTotal.With(prometheus.Labels{"workload_kind": workloadKind, "result": result}).Inc() + c.ActionLatency.With(prometheus.Labels{"workload_kind": workloadKind}).Observe(latency.Seconds()) +} + +// RecordSkipped records a skipped operation with reason. +func (c *Collectors) RecordSkipped(reason string) { + if c == nil { + return + } + c.SkippedTotal.With(prometheus.Labels{"reason": reason}).Inc() +} + +// RecordQueueAdd records an item being added to the queue. +func (c *Collectors) RecordQueueAdd() { + if c == nil { + return + } + c.QueueAdds.Inc() +} + +// SetQueueDepth sets the current queue depth. +func (c *Collectors) SetQueueDepth(depth int) { + if c == nil { + return + } + c.QueueDepth.Set(float64(depth)) +} + +// RecordQueueLatency records how long an item spent in the queue. +func (c *Collectors) RecordQueueLatency(latency time.Duration) { + if c == nil { + return + } + c.QueueLatency.With(prometheus.Labels{}).Observe(latency.Seconds()) +} + +// RecordError records an error by type. +func (c *Collectors) RecordError(errorType string) { + if c == nil { + return + } + c.ErrorsTotal.With(prometheus.Labels{"type": errorType}).Inc() +} + +// RecordRetry records a retry attempt. +func (c *Collectors) RecordRetry() { + if c == nil { + return + } + c.RetriesTotal.Inc() +} + +// RecordEventReceived records an event being received. +func (c *Collectors) RecordEventReceived(eventType string, resourceType string) { + if c == nil { + return + } + c.EventsReceived.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType}).Inc() +} + +// RecordEventProcessed records an event being processed. +func (c *Collectors) RecordEventProcessed(eventType string, resourceType string, result string) { + if c == nil { + return + } + c.EventsProcessed.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType, "result": result}).Inc() +} + +// RecordWorkloadsScanned records workloads scanned during a reconcile. +func (c *Collectors) RecordWorkloadsScanned(kind string, count int) { + if c == nil { + return + } + c.WorkloadsScanned.With(prometheus.Labels{"kind": kind}).Add(float64(count)) +} + +// RecordWorkloadsMatched records workloads matched for reload. +func (c *Collectors) RecordWorkloadsMatched(kind string, count int) { + if c == nil { + return + } + c.WorkloadsMatched.With(prometheus.Labels{"kind": kind}).Add(float64(count)) } func NewCollectors() Collectors { + // Existing metrics (preserved) reloaded := prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "reloader", Name: "reload_executed_total", Help: "Counter of reloads executed by Reloader.", }, - []string{ - "success", - }, + []string{"success"}, ) - - //set 0 as default value reloaded.With(prometheus.Labels{"success": "true"}).Add(0) reloaded.With(prometheus.Labels{"success": "false"}).Add(0) - reloaded_by_namespace := prometheus.NewCounterVec( + reloadedByNamespace := prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "reloader", Name: "reload_executed_total_by_namespace", Help: "Counter of reloads executed by Reloader by namespace.", }, - []string{ - "success", - "namespace", + []string{"success", "namespace"}, + ) + + reconcileTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "reconcile_total", + Help: "Total number of reconcile/handler invocations by result.", + }, + []string{"result"}, + ) + + reconcileDuration := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "reloader", + Name: "reconcile_duration_seconds", + Help: "Time spent in reconcile/handler in seconds.", + Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10}, + }, + []string{"result"}, + ) + + actionTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "action_total", + Help: "Total number of reload actions by workload kind and result.", + }, + []string{"workload_kind", "result"}, + ) + + actionLatency := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "reloader", + Name: "action_latency_seconds", + Help: "Time from event received to action applied in seconds.", + Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60}, + }, + []string{"workload_kind"}, + ) + + skippedTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "skipped_total", + Help: "Total number of skipped operations by reason.", + }, + []string{"reason"}, + ) + + queueDepth := prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "reloader", + Name: "workqueue_depth", + Help: "Current depth of the work queue.", }, ) + + queueAdds := prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "workqueue_adds_total", + Help: "Total number of items added to the work queue.", + }, + ) + + queueLatency := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "reloader", + Name: "workqueue_latency_seconds", + Help: "Time spent in the work queue in seconds.", + Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5}, + }, + []string{}, + ) + + errorsTotal := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "errors_total", + Help: "Total number of errors by type.", + }, + []string{"type"}, + ) + + retriesTotal := prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "retries_total", + Help: "Total number of retry attempts.", + }, + ) + + eventsReceived := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "events_received_total", + Help: "Total number of events received by type and resource.", + }, + []string{"event_type", "resource_type"}, + ) + + eventsProcessed := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "events_processed_total", + Help: "Total number of events processed by type, resource, and result.", + }, + []string{"event_type", "resource_type", "result"}, + ) + + workloadsScanned := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "workloads_scanned_total", + Help: "Total number of workloads scanned by kind.", + }, + []string{"kind"}, + ) + + workloadsMatched := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "reloader", + Name: "workloads_matched_total", + Help: "Total number of workloads matched for reload by kind.", + }, + []string{"kind"}, + ) + return Collectors{ Reloaded: reloaded, - ReloadedByNamespace: reloaded_by_namespace, + ReloadedByNamespace: reloadedByNamespace, + countByNamespace: os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled", + + ReconcileTotal: reconcileTotal, + ReconcileDuration: reconcileDuration, + ActionTotal: actionTotal, + ActionLatency: actionLatency, + SkippedTotal: skippedTotal, + QueueDepth: queueDepth, + QueueAdds: queueAdds, + QueueLatency: queueLatency, + ErrorsTotal: errorsTotal, + RetriesTotal: retriesTotal, + EventsReceived: eventsReceived, + EventsProcessed: eventsProcessed, + WorkloadsScanned: workloadsScanned, + WorkloadsMatched: workloadsMatched, } } func SetupPrometheusEndpoint() Collectors { collectors := NewCollectors() + prometheus.MustRegister(collectors.Reloaded) + prometheus.MustRegister(collectors.ReconcileTotal) + prometheus.MustRegister(collectors.ReconcileDuration) + prometheus.MustRegister(collectors.ActionTotal) + prometheus.MustRegister(collectors.ActionLatency) + prometheus.MustRegister(collectors.SkippedTotal) + prometheus.MustRegister(collectors.QueueDepth) + prometheus.MustRegister(collectors.QueueAdds) + prometheus.MustRegister(collectors.QueueLatency) + prometheus.MustRegister(collectors.ErrorsTotal) + prometheus.MustRegister(collectors.RetriesTotal) + prometheus.MustRegister(collectors.EventsReceived) + prometheus.MustRegister(collectors.EventsProcessed) + prometheus.MustRegister(collectors.WorkloadsScanned) + prometheus.MustRegister(collectors.WorkloadsMatched) if os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled" { prometheus.MustRegister(collectors.ReloadedByNamespace) diff --git a/internal/pkg/options/flags.go b/internal/pkg/options/flags.go index 0f99be8..62f2853 100644 --- a/internal/pkg/options/flags.go +++ b/internal/pkg/options/flags.go @@ -20,6 +20,9 @@ var ( // SecretUpdateOnChangeAnnotation is an annotation to detect changes in // secrets specified by name SecretUpdateOnChangeAnnotation = "secret.reloader.stakater.com/reload" + // SecretProviderClassUpdateOnChangeAnnotation is an annotation to detect changes in + // secretproviderclasses specified by name + SecretProviderClassUpdateOnChangeAnnotation = "secretproviderclass.reloader.stakater.com/reload" // ReloaderAutoAnnotation is an annotation to detect changes in secrets/configmaps ReloaderAutoAnnotation = "reloader.stakater.com/auto" // IgnoreResourceAnnotation is an annotation to ignore changes in secrets/configmaps @@ -28,10 +31,14 @@ var ( ConfigmapReloaderAutoAnnotation = "configmap.reloader.stakater.com/auto" // SecretReloaderAutoAnnotation is an annotation to detect changes in secrets SecretReloaderAutoAnnotation = "secret.reloader.stakater.com/auto" + // SecretProviderClassReloaderAutoAnnotation is an annotation to detect changes in secretproviderclasses + SecretProviderClassReloaderAutoAnnotation = "secretproviderclass.reloader.stakater.com/auto" // ConfigmapReloaderAutoAnnotation is a comma separated list of configmaps that excludes detecting changes on cms ConfigmapExcludeReloaderAnnotation = "configmaps.exclude.reloader.stakater.com/reload" // SecretExcludeReloaderAnnotation is a comma separated list of secrets that excludes detecting changes on secrets SecretExcludeReloaderAnnotation = "secrets.exclude.reloader.stakater.com/reload" + // SecretProviderClassExcludeReloaderAnnotation is a comma separated list of secret provider classes that excludes detecting changes on secret provider class + SecretProviderClassExcludeReloaderAnnotation = "secretproviderclasses.exclude.reloader.stakater.com/reload" // AutoSearchAnnotation is an annotation to detect changes in // configmaps or triggers with the SearchMatchAnnotation AutoSearchAnnotation = "reloader.stakater.com/search" @@ -63,6 +70,8 @@ var ( EnableHA = false // Url to send a request to instead of triggering a reload WebhookUrl = "" + // EnableCSIIntegration Adds support to watch SecretProviderClassPodStatus and restart deployment based on it + EnableCSIIntegration = false // ResourcesToIgnore is a list of resources to ignore when watching for changes ResourcesToIgnore = []string{} // WorkloadTypesToIgnore is a list of workload types to ignore when watching for changes diff --git a/internal/pkg/testutil/kube.go b/internal/pkg/testutil/kube.go index 1bf441c..a778eb1 100644 --- a/internal/pkg/testutil/kube.go +++ b/internal/pkg/testutil/kube.go @@ -2,6 +2,8 @@ package testutil import ( "context" + "encoding/json" + "fmt" "math/rand" "sort" "strconv" @@ -10,10 +12,13 @@ import ( argorolloutv1alpha1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" argorollout "github.com/argoproj/argo-rollouts/pkg/client/clientset/versioned" + openshiftv1 "github.com/openshift/api/apps/v1" + appsclient "github.com/openshift/client-go/apps/clientset/versioned" "github.com/sirupsen/logrus" "github.com/stakater/Reloader/internal/pkg/callbacks" "github.com/stakater/Reloader/internal/pkg/constants" "github.com/stakater/Reloader/internal/pkg/crypto" + "github.com/stakater/Reloader/internal/pkg/metrics" "github.com/stakater/Reloader/internal/pkg/options" "github.com/stakater/Reloader/internal/pkg/util" "github.com/stakater/Reloader/pkg/common" @@ -25,12 +30,19 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" core_v1 "k8s.io/client-go/kubernetes/typed/core/v1" + csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" + csiclient "sigs.k8s.io/secrets-store-csi-driver/pkg/client/clientset/versioned" + csiclient_v1 "sigs.k8s.io/secrets-store-csi-driver/pkg/client/clientset/versioned/typed/apis/v1" ) var ( letters = []rune("abcdefghijklmnopqrstuvwxyz") // ConfigmapResourceType is a resource type which controller watches for changes ConfigmapResourceType = "configMaps" + // SecretResourceType is a resource type which controller watches for changes + SecretResourceType = "secrets" + // SecretproviderclasspodstatusResourceType is a resource type which controller watches for changes + SecretProviderClassPodStatusResourceType = "secretproviderclasspodstatuses" ) var ( @@ -38,6 +50,11 @@ var ( Pod = "test-reloader-" + RandSeq(5) Namespace = "test-reloader-" + RandSeq(5) ConfigmapNamePrefix = "testconfigmap-reloader" + SecretNamePrefix = "testsecret-reloader" + Data = "dGVzdFNlY3JldEVuY29kaW5nRm9yUmVsb2FkZXI=" + NewData = "dGVzdE5ld1NlY3JldEVuY29kaW5nRm9yUmVsb2FkZXI=" + UpdatedData = "dGVzdFVwZGF0ZWRTZWNyZXRFbmNvZGluZ0ZvclJlbG9hZGVy" + Collectors = metrics.NewCollectors() SleepDuration = 3 * time.Second ) @@ -61,16 +78,16 @@ func DeleteNamespace(namespace string, client kubernetes.Interface) { } } -func getObjectMeta(namespace string, name string, autoReload bool, secretAutoReload bool, configmapAutoReload bool, extraAnnotations map[string]string) metav1.ObjectMeta { +func getObjectMeta(namespace string, name string, autoReload bool, secretAutoReload bool, configmapAutoReload bool, secretproviderclass bool, extraAnnotations map[string]string) metav1.ObjectMeta { return metav1.ObjectMeta{ Name: name, Namespace: namespace, Labels: map[string]string{"firstLabel": "temp"}, - Annotations: getAnnotations(name, autoReload, secretAutoReload, configmapAutoReload, extraAnnotations), + Annotations: getAnnotations(name, autoReload, secretAutoReload, configmapAutoReload, secretproviderclass, extraAnnotations), } } -func getAnnotations(name string, autoReload bool, secretAutoReload bool, configmapAutoReload bool, extraAnnotations map[string]string) map[string]string { +func getAnnotations(name string, autoReload bool, secretAutoReload bool, configmapAutoReload bool, secretproviderclass bool, extraAnnotations map[string]string) map[string]string { annotations := make(map[string]string) if autoReload { annotations[options.ReloaderAutoAnnotation] = "true" @@ -81,11 +98,16 @@ func getAnnotations(name string, autoReload bool, secretAutoReload bool, configm if configmapAutoReload { annotations[options.ConfigmapReloaderAutoAnnotation] = "true" } + if secretproviderclass { + annotations[options.SecretProviderClassReloaderAutoAnnotation] = "true" + } if len(annotations) == 0 { annotations = map[string]string{ - options.ConfigmapUpdateOnChangeAnnotation: name, - options.SecretUpdateOnChangeAnnotation: name} + options.ConfigmapUpdateOnChangeAnnotation: name, + options.SecretUpdateOnChangeAnnotation: name, + options.SecretProviderClassUpdateOnChangeAnnotation: name, + } } for k, v := range extraAnnotations { annotations[k] = v @@ -93,6 +115,25 @@ func getAnnotations(name string, autoReload bool, secretAutoReload bool, configm return annotations } +func getEnvVarSources(name string) []v1.EnvFromSource { + return []v1.EnvFromSource{ + { + ConfigMapRef: &v1.ConfigMapEnvSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: name, + }, + }, + }, + { + SecretRef: &v1.SecretEnvSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: name, + }, + }, + }, + } +} + func getVolumes(name string) []v1.Volume { return []v1.Volume{ { @@ -145,6 +186,15 @@ func getVolumes(name string) []v1.Volume { }, }, }, + { + Name: "secretproviderclass", + VolumeSource: v1.VolumeSource{ + CSI: &v1.CSIVolumeSource{ + Driver: "secrets-store.csi.k8s.io", + VolumeAttributes: map[string]string{"secretProviderClass": name}, + }, + }, + }, } } @@ -158,6 +208,10 @@ func getVolumeMounts() []v1.VolumeMount { MountPath: "etc/sec", Name: "secret", }, + { + MountPath: "etc/spc", + Name: "secretproviderclass", + }, { MountPath: "etc/projectedconfig", Name: "projectedconfigmap", @@ -213,6 +267,23 @@ func getPodTemplateSpecWithEnvVars(name string) v1.PodTemplateSpec { } } +func getPodTemplateSpecWithEnvVarSources(name string) v1.PodTemplateSpec { + return v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"secondLabel": "temp"}, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: "tutum/hello-world", + Name: name, + EnvFrom: getEnvVarSources(name), + }, + }, + }, + } +} + func getPodTemplateSpecWithVolumes(name string) v1.PodTemplateSpec { return v1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ @@ -237,11 +308,70 @@ func getPodTemplateSpecWithVolumes(name string) v1.PodTemplateSpec { } } +func getPodTemplateSpecWithInitContainer(name string) v1.PodTemplateSpec { + return v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"secondLabel": "temp"}, + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Image: "busybox", + Name: "busyBox", + VolumeMounts: getVolumeMounts(), + }, + }, + Containers: []v1.Container{ + { + Image: "tutum/hello-world", + Name: name, + Env: []v1.EnvVar{ + { + Name: "BUCKET_NAME", + Value: "test", + }, + }, + }, + }, + Volumes: getVolumes(name), + }, + } +} + +func getPodTemplateSpecWithInitContainerAndEnv(name string) v1.PodTemplateSpec { + return v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"secondLabel": "temp"}, + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Image: "busybox", + Name: "busyBox", + EnvFrom: getEnvVarSources(name), + }, + }, + Containers: []v1.Container{ + { + Image: "tutum/hello-world", + Name: name, + Env: []v1.EnvVar{ + { + Name: "BUCKET_NAME", + Value: "test", + }, + }, + }, + }, + }, + } +} + // GetDeployment provides deployment for testing func GetDeployment(namespace string, deploymentName string) *appsv1.Deployment { replicaset := int32(1) return &appsv1.Deployment{ - ObjectMeta: getObjectMeta(namespace, deploymentName, false, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, deploymentName, false, false, false, false, map[string]string{}), Spec: appsv1.DeploymentSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -255,10 +385,62 @@ func GetDeployment(namespace string, deploymentName string) *appsv1.Deployment { } } +// GetDeploymentConfig provides deployment for testing +func GetDeploymentConfig(namespace string, deploymentConfigName string) *openshiftv1.DeploymentConfig { + replicaset := int32(1) + podTemplateSpecWithVolume := getPodTemplateSpecWithVolumes(deploymentConfigName) + return &openshiftv1.DeploymentConfig{ + ObjectMeta: getObjectMeta(namespace, deploymentConfigName, false, false, false, false, map[string]string{}), + Spec: openshiftv1.DeploymentConfigSpec{ + Replicas: replicaset, + Strategy: openshiftv1.DeploymentStrategy{ + Type: openshiftv1.DeploymentStrategyTypeRolling, + }, + Template: &podTemplateSpecWithVolume, + }, + } +} + +// GetDeploymentWithInitContainer provides deployment with init container and volumeMounts +func GetDeploymentWithInitContainer(namespace string, deploymentName string) *appsv1.Deployment { + replicaset := int32(1) + return &appsv1.Deployment{ + ObjectMeta: getObjectMeta(namespace, deploymentName, false, false, false, false, map[string]string{}), + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"secondLabel": "temp"}, + }, + Replicas: &replicaset, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: getPodTemplateSpecWithInitContainer(deploymentName), + }, + } +} + +// GetDeploymentWithInitContainerAndEnv provides deployment with init container and EnvSource +func GetDeploymentWithInitContainerAndEnv(namespace string, deploymentName string) *appsv1.Deployment { + replicaset := int32(1) + return &appsv1.Deployment{ + ObjectMeta: getObjectMeta(namespace, deploymentName, true, false, false, false, map[string]string{}), + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"secondLabel": "temp"}, + }, + Replicas: &replicaset, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: getPodTemplateSpecWithInitContainerAndEnv(deploymentName), + }, + } +} + func GetDeploymentWithEnvVars(namespace string, deploymentName string) *appsv1.Deployment { replicaset := int32(1) return &appsv1.Deployment{ - ObjectMeta: getObjectMeta(namespace, deploymentName, true, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, deploymentName, true, false, false, false, map[string]string{}), Spec: appsv1.DeploymentSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -272,10 +454,125 @@ func GetDeploymentWithEnvVars(namespace string, deploymentName string) *appsv1.D } } +func GetDeploymentConfigWithEnvVars(namespace string, deploymentConfigName string) *openshiftv1.DeploymentConfig { + replicaset := int32(1) + podTemplateSpecWithEnvVars := getPodTemplateSpecWithEnvVars(deploymentConfigName) + return &openshiftv1.DeploymentConfig{ + ObjectMeta: getObjectMeta(namespace, deploymentConfigName, false, false, false, false, map[string]string{}), + Spec: openshiftv1.DeploymentConfigSpec{ + Replicas: replicaset, + Strategy: openshiftv1.DeploymentStrategy{ + Type: openshiftv1.DeploymentStrategyTypeRolling, + }, + Template: &podTemplateSpecWithEnvVars, + }, + } +} + +func GetDeploymentWithEnvVarSources(namespace string, deploymentName string) *appsv1.Deployment { + replicaset := int32(1) + return &appsv1.Deployment{ + ObjectMeta: getObjectMeta(namespace, deploymentName, true, false, false, false, map[string]string{}), + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"secondLabel": "temp"}, + }, + Replicas: &replicaset, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: getPodTemplateSpecWithEnvVarSources(deploymentName), + }, + } +} + +func GetDeploymentWithPodAnnotations(namespace string, deploymentName string, both bool) *appsv1.Deployment { + replicaset := int32(1) + deployment := &appsv1.Deployment{ + ObjectMeta: getObjectMeta(namespace, deploymentName, false, false, false, false, map[string]string{}), + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"secondLabel": "temp"}, + }, + Replicas: &replicaset, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: getPodTemplateSpecWithEnvVarSources(deploymentName), + }, + } + if !both { + deployment.Annotations = nil + } + deployment.Spec.Template.Annotations = getAnnotations(deploymentName, true, false, false, false, map[string]string{}) + return deployment +} + +func GetDeploymentWithTypedAutoAnnotation(namespace string, deploymentName string, resourceType string) *appsv1.Deployment { + replicaset := int32(1) + var objectMeta metav1.ObjectMeta + switch resourceType { + case SecretResourceType: + objectMeta = getObjectMeta(namespace, deploymentName, false, true, false, false, map[string]string{}) + case ConfigmapResourceType: + objectMeta = getObjectMeta(namespace, deploymentName, false, false, true, false, map[string]string{}) + case SecretProviderClassPodStatusResourceType: + objectMeta = getObjectMeta(namespace, deploymentName, false, false, false, true, map[string]string{}) + } + + return &appsv1.Deployment{ + ObjectMeta: objectMeta, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"secondLabel": "temp"}, + }, + Replicas: &replicaset, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: getPodTemplateSpecWithVolumes(deploymentName), + }, + } +} + +func GetDeploymentWithExcludeAnnotation(namespace string, deploymentName string, resourceType string) *appsv1.Deployment { + replicaset := int32(1) + + annotation := map[string]string{} + + switch resourceType { + case SecretResourceType: + annotation[options.SecretExcludeReloaderAnnotation] = deploymentName + case ConfigmapResourceType: + annotation[options.ConfigmapExcludeReloaderAnnotation] = deploymentName + case SecretProviderClassPodStatusResourceType: + annotation[options.SecretProviderClassExcludeReloaderAnnotation] = deploymentName + } + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: deploymentName, + Namespace: namespace, + Labels: map[string]string{"firstLabel": "temp"}, + Annotations: annotation, + }, + Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"secondLabel": "temp"}, + }, + Replicas: &replicaset, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: getPodTemplateSpecWithVolumes(deploymentName), + }, + } +} + // GetDaemonSet provides daemonset for testing func GetDaemonSet(namespace string, daemonsetName string) *appsv1.DaemonSet { return &appsv1.DaemonSet{ - ObjectMeta: getObjectMeta(namespace, daemonsetName, false, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, daemonsetName, false, false, false, false, map[string]string{}), Spec: appsv1.DaemonSetSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -290,7 +587,7 @@ func GetDaemonSet(namespace string, daemonsetName string) *appsv1.DaemonSet { func GetDaemonSetWithEnvVars(namespace string, daemonSetName string) *appsv1.DaemonSet { return &appsv1.DaemonSet{ - ObjectMeta: getObjectMeta(namespace, daemonSetName, true, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, daemonSetName, true, false, false, false, map[string]string{}), Spec: appsv1.DaemonSetSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -306,7 +603,7 @@ func GetDaemonSetWithEnvVars(namespace string, daemonSetName string) *appsv1.Dae // GetStatefulSet provides statefulset for testing func GetStatefulSet(namespace string, statefulsetName string) *appsv1.StatefulSet { return &appsv1.StatefulSet{ - ObjectMeta: getObjectMeta(namespace, statefulsetName, false, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, statefulsetName, false, false, false, false, map[string]string{}), Spec: appsv1.StatefulSetSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -322,7 +619,7 @@ func GetStatefulSet(namespace string, statefulsetName string) *appsv1.StatefulSe // GetStatefulSet provides statefulset for testing func GetStatefulSetWithEnvVar(namespace string, statefulsetName string) *appsv1.StatefulSet { return &appsv1.StatefulSet{ - ObjectMeta: getObjectMeta(namespace, statefulsetName, true, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, statefulsetName, true, false, false, false, map[string]string{}), Spec: appsv1.StatefulSetSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -347,6 +644,42 @@ func GetConfigmap(namespace string, configmapName string, testData string) *v1.C } } +func GetSecretProviderClass(namespace string, secretProviderClassName string, data string) *csiv1.SecretProviderClass { + return &csiv1.SecretProviderClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretProviderClassName, + Namespace: namespace, + }, + Spec: csiv1.SecretProviderClassSpec{ + Provider: "Test", + Parameters: map[string]string{ + "parameter1": data, + }, + }, + } +} + +func GetSecretProviderClassPodStatus(namespace string, secretProviderClassPodStatusName string, data string) *csiv1.SecretProviderClassPodStatus { + return &csiv1.SecretProviderClassPodStatus{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretProviderClassPodStatusName, + Namespace: namespace, + }, + Status: csiv1.SecretProviderClassPodStatusStatus{ + PodName: "test123", + SecretProviderClassName: secretProviderClassPodStatusName, + TargetPath: "/var/lib/kubelet/d8771ddf-935a-4199-a20b-f35f71c1d9e7/volumes/kubernetes.io~csi/secrets-store-inline/mount", + Mounted: true, + Objects: []csiv1.SecretProviderClassObject{ + { + ID: "parameter1", + Version: data, + }, + }, + }, + } +} + // GetConfigmapWithUpdatedLabel provides configmap for testing func GetConfigmapWithUpdatedLabel(namespace string, configmapName string, testLabel string, testData string) *v1.ConfigMap { return &v1.ConfigMap{ @@ -359,9 +692,21 @@ func GetConfigmapWithUpdatedLabel(namespace string, configmapName string, testLa } } +// GetSecret provides secret for testing +func GetSecret(namespace string, secretName string, data string) *v1.Secret { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: namespace, + Labels: map[string]string{"firstLabel": "temp"}, + }, + Data: map[string][]byte{"test.url": []byte(data)}, + } +} + func GetCronJob(namespace string, cronJobName string) *batchv1.CronJob { return &batchv1.CronJob{ - ObjectMeta: getObjectMeta(namespace, cronJobName, false, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, cronJobName, false, false, false, false, map[string]string{}), Spec: batchv1.CronJobSpec{ Schedule: "*/5 * * * *", // Run every 5 minutes JobTemplate: batchv1.JobTemplateSpec{ @@ -378,7 +723,7 @@ func GetCronJob(namespace string, cronJobName string) *batchv1.CronJob { func GetJob(namespace string, jobName string) *batchv1.Job { return &batchv1.Job{ - ObjectMeta: getObjectMeta(namespace, jobName, false, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, jobName, false, false, false, false, map[string]string{}), Spec: batchv1.JobSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -390,7 +735,7 @@ func GetJob(namespace string, jobName string) *batchv1.Job { func GetCronJobWithEnvVar(namespace string, cronJobName string) *batchv1.CronJob { return &batchv1.CronJob{ - ObjectMeta: getObjectMeta(namespace, cronJobName, true, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, cronJobName, true, false, false, false, map[string]string{}), Spec: batchv1.CronJobSpec{ Schedule: "*/5 * * * *", // Run every 5 minutes JobTemplate: batchv1.JobTemplateSpec{ @@ -407,7 +752,7 @@ func GetCronJobWithEnvVar(namespace string, cronJobName string) *batchv1.CronJob func GetJobWithEnvVar(namespace string, jobName string) *batchv1.Job { return &batchv1.Job{ - ObjectMeta: getObjectMeta(namespace, jobName, true, false, false, map[string]string{}), + ObjectMeta: getObjectMeta(namespace, jobName, true, false, false, false, map[string]string{}), Spec: batchv1.JobSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, @@ -417,6 +762,18 @@ func GetJobWithEnvVar(namespace string, jobName string) *batchv1.Job { } } +// GetSecretWithUpdatedLabel provides secret for testing +func GetSecretWithUpdatedLabel(namespace string, secretName string, label string, data string) *v1.Secret { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: namespace, + Labels: map[string]string{"firstLabel": label}, + }, + Data: map[string][]byte{"test.url": []byte(data)}, + } +} + // GetResourceSHAFromEnvVar returns the SHA value of given environment variable func GetResourceSHAFromEnvVar(containers []v1.Container, envVar string) string { for i := range containers { @@ -430,14 +787,48 @@ func GetResourceSHAFromEnvVar(containers []v1.Container, envVar string) string { return "" } -// ConvertResourceToSHA generates SHA from configmap data +// GetResourceSHAFromAnnotation returns the SHA value of given environment variable +func GetResourceSHAFromAnnotation(podAnnotations map[string]string) string { + lastReloadedResourceName := fmt.Sprintf("%s/%s", + constants.ReloaderAnnotationPrefix, + constants.LastReloadedFromAnnotation, + ) + + annotationJson, ok := podAnnotations[lastReloadedResourceName] + if !ok { + return "" + } + + var last common.ReloadSource + bytes := []byte(annotationJson) + err := json.Unmarshal(bytes, &last) + if err != nil { + return "" + } + + return last.Hash +} + +// ConvertResourceToSHA generates SHA from secret, configmap or secretproviderclasspodstatus data func ConvertResourceToSHA(resourceType string, namespace string, resourceName string, data string) string { values := []string{} - if resourceType == ConfigmapResourceType { + switch resourceType { + case SecretResourceType: + secret := GetSecret(namespace, resourceName, data) + for k, v := range secret.Data { + values = append(values, k+"="+string(v[:])) + } + case ConfigmapResourceType: configmap := GetConfigmap(namespace, resourceName, data) for k, v := range configmap.Data { values = append(values, k+"="+v) } + case SecretProviderClassPodStatusResourceType: + secretproviderclasspodstatus := GetSecretProviderClassPodStatus(namespace, resourceName, data) + for _, v := range secretproviderclasspodstatus.Status.Objects { + values = append(values, v.ID+"="+v.Version) + } + values = append(values, "SecretProviderClassName="+secretproviderclasspodstatus.Status.SecretProviderClassName) } sort.Strings(values) return crypto.GenerateSHA(strings.Join(values, ";")) @@ -452,6 +843,34 @@ func CreateConfigMap(client kubernetes.Interface, namespace string, configmapNam return configmapClient, err } +// CreateSecretProviderClass creates a SecretProviderClass in given namespace and returns the SecretProviderClassInterface +func CreateSecretProviderClass(client csiclient.Interface, namespace string, secretProviderClassName string, data string) (csiclient_v1.SecretProviderClassInterface, error) { + logrus.Infof("Creating SecretProviderClass") + secretProviderClassClient := client.SecretsstoreV1().SecretProviderClasses(namespace) + _, err := secretProviderClassClient.Create(context.TODO(), GetSecretProviderClass(namespace, secretProviderClassName, data), metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return secretProviderClassClient, err +} + +// CreateSecretProviderClassPodStatus creates a SecretProviderClassPodStatus in given namespace and returns the SecretProviderClassPodStatusInterface +func CreateSecretProviderClassPodStatus(client csiclient.Interface, namespace string, secretProviderClassPodStatusName string, data string) (csiclient_v1.SecretProviderClassPodStatusInterface, error) { + logrus.Infof("Creating SecretProviderClassPodStatus") + secretProviderClassPodStatusClient := client.SecretsstoreV1().SecretProviderClassPodStatuses(namespace) + secretProviderClassPodStatus := GetSecretProviderClassPodStatus(namespace, secretProviderClassPodStatusName, data) + _, err := secretProviderClassPodStatusClient.Create(context.TODO(), secretProviderClassPodStatus, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return secretProviderClassPodStatusClient, err +} + +// CreateSecret creates a secret in given namespace and returns the SecretInterface +func CreateSecret(client kubernetes.Interface, namespace string, secretName string, data string) (core_v1.SecretInterface, error) { + logrus.Infof("Creating secret") + secretClient := client.CoreV1().Secrets(namespace) + _, err := secretClient.Create(context.TODO(), GetSecret(namespace, secretName, data), metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return secretClient, err +} + // CreateDeployment creates a deployment in given namespace and returns the Deployment func CreateDeployment(client kubernetes.Interface, deploymentName string, namespace string, volumeMount bool) (*appsv1.Deployment, error) { logrus.Infof("Creating Deployment") @@ -467,6 +886,108 @@ func CreateDeployment(client kubernetes.Interface, deploymentName string, namesp return deployment, err } +// CreateDeployment creates a deployment in given namespace and returns the Deployment +func CreateDeploymentWithAnnotations(client kubernetes.Interface, deploymentName string, namespace string, additionalAnnotations map[string]string, volumeMount bool) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + var deploymentObj *appsv1.Deployment + if volumeMount { + deploymentObj = GetDeployment(namespace, deploymentName) + } else { + deploymentObj = GetDeploymentWithEnvVars(namespace, deploymentName) + } + + for annotationKey, annotationValue := range additionalAnnotations { + deploymentObj.Annotations[annotationKey] = annotationValue + } + + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return deployment, err +} + +// CreateDeploymentConfig creates a deploymentConfig in given namespace and returns the DeploymentConfig +func CreateDeploymentConfig(client appsclient.Interface, deploymentName string, namespace string, volumeMount bool) (*openshiftv1.DeploymentConfig, error) { + logrus.Infof("Creating DeploymentConfig") + deploymentConfigsClient := client.AppsV1().DeploymentConfigs(namespace) + var deploymentConfigObj *openshiftv1.DeploymentConfig + if volumeMount { + deploymentConfigObj = GetDeploymentConfig(namespace, deploymentName) + } else { + deploymentConfigObj = GetDeploymentConfigWithEnvVars(namespace, deploymentName) + } + deploymentConfig, err := deploymentConfigsClient.Create(context.TODO(), deploymentConfigObj, metav1.CreateOptions{}) + time.Sleep(5 * time.Second) + return deploymentConfig, err +} + +// CreateDeploymentWithInitContainer creates a deployment in given namespace with init container and returns the Deployment +func CreateDeploymentWithInitContainer(client kubernetes.Interface, deploymentName string, namespace string, volumeMount bool) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + var deploymentObj *appsv1.Deployment + if volumeMount { + deploymentObj = GetDeploymentWithInitContainer(namespace, deploymentName) + } else { + deploymentObj = GetDeploymentWithInitContainerAndEnv(namespace, deploymentName) + } + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return deployment, err +} + +// CreateDeploymentWithEnvVarSource creates a deployment in given namespace and returns the Deployment +func CreateDeploymentWithEnvVarSource(client kubernetes.Interface, deploymentName string, namespace string) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + deploymentObj := GetDeploymentWithEnvVarSources(namespace, deploymentName) + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return deployment, err + +} + +// CreateDeploymentWithPodAnnotations creates a deployment in given namespace and returns the Deployment +func CreateDeploymentWithPodAnnotations(client kubernetes.Interface, deploymentName string, namespace string, both bool) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + deploymentObj := GetDeploymentWithPodAnnotations(namespace, deploymentName, both) + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return deployment, err +} + +// CreateDeploymentWithEnvVarSourceAndAnnotations returns a deployment in given +// namespace with given annotations. +func CreateDeploymentWithEnvVarSourceAndAnnotations(client kubernetes.Interface, deploymentName string, namespace string, annotations map[string]string) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + deploymentObj := GetDeploymentWithEnvVarSources(namespace, deploymentName) + deploymentObj.Annotations = annotations + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return deployment, err +} + +// CreateDeploymentWithTypedAutoAnnotation creates a deployment in given namespace and returns the Deployment with typed auto annotation +func CreateDeploymentWithTypedAutoAnnotation(client kubernetes.Interface, deploymentName string, namespace string, resourceType string) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + deploymentObj := GetDeploymentWithTypedAutoAnnotation(namespace, deploymentName, resourceType) + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + time.Sleep(3 * time.Second) + return deployment, err +} + +// CreateDeploymentWithExcludeAnnotation creates a deployment in given namespace and returns the Deployment with typed auto annotation +func CreateDeploymentWithExcludeAnnotation(client kubernetes.Interface, deploymentName string, namespace string, resourceType string) (*appsv1.Deployment, error) { + logrus.Infof("Creating Deployment") + deploymentClient := client.AppsV1().Deployments(namespace) + deploymentObj := GetDeploymentWithExcludeAnnotation(namespace, deploymentName, resourceType) + deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{}) + return deployment, err +} + // CreateDaemonSet creates a deployment in given namespace and returns the DaemonSet func CreateDaemonSet(client kubernetes.Interface, daemonsetName string, namespace string, volumeMount bool) (*appsv1.DaemonSet, error) { logrus.Infof("Creating DaemonSet") @@ -535,6 +1056,14 @@ func DeleteDeployment(client kubernetes.Interface, namespace string, deploymentN return deploymentError } +// DeleteDeploymentConfig deletes a deploymentConfig in given namespace and returns the error if any +func DeleteDeploymentConfig(client appsclient.Interface, namespace string, deploymentConfigName string) error { + logrus.Infof("Deleting DeploymentConfig") + deploymentConfigError := client.AppsV1().DeploymentConfigs(namespace).Delete(context.TODO(), deploymentConfigName, metav1.DeleteOptions{}) + time.Sleep(3 * time.Second) + return deploymentConfigError +} + // DeleteDaemonSet creates a daemonset in given namespace and returns the error if any func DeleteDaemonSet(client kubernetes.Interface, namespace string, daemonsetName string) error { logrus.Infof("Deleting DaemonSet %s", daemonsetName) @@ -581,6 +1110,41 @@ func UpdateConfigMap(configmapClient core_v1.ConfigMapInterface, namespace strin return updateErr } +// UpdateSecret updates a secret in given namespace and returns the error if any +func UpdateSecret(secretClient core_v1.SecretInterface, namespace string, secretName string, label string, data string) error { + logrus.Infof("Updating secret %q.\n", secretName) + var secret *v1.Secret + if label != "" { + secret = GetSecretWithUpdatedLabel(namespace, secretName, label, data) + } else { + secret = GetSecret(namespace, secretName, data) + } + _, updateErr := secretClient.Update(context.TODO(), secret, metav1.UpdateOptions{}) + time.Sleep(3 * time.Second) + return updateErr +} + +// UpdateSecretProviderClassPodStatus updates a secretproviderclasspodstatus in given namespace and returns the error if any +func UpdateSecretProviderClassPodStatus(spcpsClient csiclient_v1.SecretProviderClassPodStatusInterface, namespace string, spcpsName string, label string, data string) error { + logrus.Infof("Updating secretproviderclasspodstatus %q.\n", spcpsName) + updatedStatus := GetSecretProviderClassPodStatus(namespace, spcpsName, data).Status + secretproviderclasspodstatus, err := spcpsClient.Get(context.TODO(), spcpsName, metav1.GetOptions{}) + if err != nil { + return err + } + secretproviderclasspodstatus.Status = updatedStatus + if label != "" { + labels := secretproviderclasspodstatus.Labels + if labels == nil { + labels = make(map[string]string) + } + labels["firstLabel"] = label + } + _, updateErr := spcpsClient.Update(context.TODO(), secretproviderclasspodstatus, metav1.UpdateOptions{}) + time.Sleep(3 * time.Second) + return updateErr +} + // DeleteConfigMap deletes a configmap in given namespace and returns the error if any func DeleteConfigMap(client kubernetes.Interface, namespace string, configmapName string) error { logrus.Infof("Deleting configmap %q.\n", configmapName) @@ -589,6 +1153,30 @@ func DeleteConfigMap(client kubernetes.Interface, namespace string, configmapNam return err } +// DeleteSecret deletes a secret in given namespace and returns the error if any +func DeleteSecret(client kubernetes.Interface, namespace string, secretName string) error { + logrus.Infof("Deleting secret %q.\n", secretName) + err := client.CoreV1().Secrets(namespace).Delete(context.TODO(), secretName, metav1.DeleteOptions{}) + time.Sleep(3 * time.Second) + return err +} + +// DeleteSecretProviderClass deletes a secretproviderclass in given namespace and returns the error if any +func DeleteSecretProviderClass(client csiclient.Interface, namespace string, secretProviderClassName string) error { + logrus.Infof("Deleting secretproviderclass %q.\n", secretProviderClassName) + err := client.SecretsstoreV1().SecretProviderClasses(namespace).Delete(context.TODO(), secretProviderClassName, metav1.DeleteOptions{}) + time.Sleep(3 * time.Second) + return err +} + +// DeleteSecretProviderClassPodStatus deletes a secretproviderclasspodstatus in given namespace and returns the error if any +func DeleteSecretProviderClassPodStatus(client csiclient.Interface, namespace string, secretProviderClassPodStatusName string) error { + logrus.Infof("Deleting secretproviderclasspodstatus %q.\n", secretProviderClassPodStatusName) + err := client.SecretsstoreV1().SecretProviderClassPodStatuses(namespace).Delete(context.TODO(), secretProviderClassPodStatusName, metav1.DeleteOptions{}) + time.Sleep(3 * time.Second) + return err +} + // RandSeq generates a random sequence func RandSeq(n int) string { b := make([]rune, n) @@ -644,15 +1232,113 @@ func VerifyResourceEnvVarUpdate(clients kube.Clients, config common.Config, envV return false } +// VerifyResourceEnvVarRemoved verifies whether the rolling upgrade happened or not and all Envvars SKAKATER_name_CONFIGMAP/SECRET are removed +func VerifyResourceEnvVarRemoved(clients kube.Clients, config common.Config, envVarPostfix string, upgradeFuncs callbacks.RollingUpgradeFuncs) bool { + items := upgradeFuncs.ItemsFunc(clients, config.Namespace) + for _, i := range items { + containers := upgradeFuncs.ContainersFunc(i) + accessor, err := meta.Accessor(i) + if err != nil { + return false + } + + annotations := accessor.GetAnnotations() + // match statefulsets with the correct annotation + + annotationValue := annotations[config.Annotation] + searchAnnotationValue := annotations[options.AutoSearchAnnotation] + reloaderEnabledValue := annotations[options.ReloaderAutoAnnotation] + typedAutoAnnotationEnabledValue := annotations[config.TypedAutoAnnotation] + reloaderEnabled, err := strconv.ParseBool(reloaderEnabledValue) + typedAutoAnnotationEnabled, errTyped := strconv.ParseBool(typedAutoAnnotationEnabledValue) + + matches := false + if err == nil && reloaderEnabled || errTyped == nil && typedAutoAnnotationEnabled { + matches = true + } else if annotationValue != "" { + values := strings.Split(annotationValue, ",") + for _, value := range values { + value = strings.Trim(value, " ") + if value == config.ResourceName { + matches = true + break + } + } + } else if searchAnnotationValue == "true" { + if config.ResourceAnnotations[options.SearchMatchAnnotation] == "true" { + matches = true + } + } + + if matches { + envName := constants.EnvVarPrefix + util.ConvertToEnvVarName(config.ResourceName) + "_" + envVarPostfix + value := GetResourceSHAFromEnvVar(containers, envName) + if value == "" { + return true + } + } + } + return false +} + +// VerifyResourceAnnotationUpdate verifies whether the rolling upgrade happened or not +func VerifyResourceAnnotationUpdate(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs) bool { + items := upgradeFuncs.ItemsFunc(clients, config.Namespace) + for _, i := range items { + podAnnotations := upgradeFuncs.PodAnnotationsFunc(i) + accessor, err := meta.Accessor(i) + if err != nil { + return false + } + annotations := accessor.GetAnnotations() + // match statefulsets with the correct annotation + annotationValue := annotations[config.Annotation] + searchAnnotationValue := annotations[options.AutoSearchAnnotation] + reloaderEnabledValue := annotations[options.ReloaderAutoAnnotation] + typedAutoAnnotationEnabledValue := annotations[config.TypedAutoAnnotation] + reloaderEnabled, _ := strconv.ParseBool(reloaderEnabledValue) + typedAutoAnnotationEnabled, _ := strconv.ParseBool(typedAutoAnnotationEnabledValue) + matches := false + if reloaderEnabled || typedAutoAnnotationEnabled || reloaderEnabledValue == "" && typedAutoAnnotationEnabledValue == "" && options.AutoReloadAll { + matches = true + } else if annotationValue != "" { + values := strings.Split(annotationValue, ",") + for _, value := range values { + value = strings.Trim(value, " ") + if value == config.ResourceName { + matches = true + break + } + } + } else if searchAnnotationValue == "true" { + if config.ResourceAnnotations[options.SearchMatchAnnotation] == "true" { + matches = true + } + } + + if matches { + updated := GetResourceSHAFromAnnotation(podAnnotations) + if updated == config.SHAValue { + return true + } + } + } + return false +} + func GetSHAfromEmptyData() string { - return crypto.GenerateSHA("") + // Use a special marker that represents "deleted" or "empty" state + // This ensures we have a distinct, deterministic hash for the delete strategy + // Note: We could use GenerateSHA("") which now returns a hash, but using a marker + // makes the intent clearer and avoids potential confusion with actual empty data + return crypto.GenerateSHA("__RELOADER_EMPTY_DELETE_MARKER__") } // GetRollout provides rollout for testing func GetRollout(namespace string, rolloutName string, annotations map[string]string) *argorolloutv1alpha1.Rollout { replicaset := int32(1) return &argorolloutv1alpha1.Rollout{ - ObjectMeta: getObjectMeta(namespace, rolloutName, false, false, false, annotations), + ObjectMeta: getObjectMeta(namespace, rolloutName, false, false, false, false, annotations), Spec: argorolloutv1alpha1.RolloutSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{"secondLabel": "temp"}, diff --git a/internal/pkg/util/util.go b/internal/pkg/util/util.go index ec86d1c..476cdb9 100644 --- a/internal/pkg/util/util.go +++ b/internal/pkg/util/util.go @@ -13,6 +13,7 @@ import ( "github.com/stakater/Reloader/internal/pkg/crypto" "github.com/stakater/Reloader/internal/pkg/options" v1 "k8s.io/api/core/v1" + csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" ) // ConvertToEnvVarName converts the given text into a usable env var @@ -57,6 +58,16 @@ func GetSHAfromSecret(data map[string][]byte) string { return crypto.GenerateSHA(strings.Join(values, ";")) } +func GetSHAfromSecretProviderClassPodStatus(data csiv1.SecretProviderClassPodStatusStatus) string { + values := []string{} + for _, v := range data.Objects { + values = append(values, v.ID+"="+v.Version) + } + values = append(values, "SecretProviderClassName="+data.SecretProviderClassName) + sort.Strings(values) + return crypto.GenerateSHA(strings.Join(values, ";")) +} + type List []string func (l *List) Contains(s string) bool { @@ -95,6 +106,7 @@ func ConfigureReloaderFlags(cmd *cobra.Command) { cmd.PersistentFlags().BoolVar(&options.SyncAfterRestart, "sync-after-restart", false, "Sync add events after reloader restarts") cmd.PersistentFlags().BoolVar(&options.EnablePProf, "enable-pprof", false, "Enable pprof for profiling") cmd.PersistentFlags().StringVar(&options.PProfAddr, "pprof-addr", ":6060", "Address to start pprof server on. Default is :6060") + cmd.PersistentFlags().BoolVar(&options.EnableCSIIntegration, "enable-csi-integration", false, "Enables CSI integration. Default is :false") } func GetIgnoredResourcesList() (List, error) { diff --git a/pkg/common/common.go b/pkg/common/common.go index 84d9827..7c9d61e 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -32,6 +32,8 @@ type ReloaderOptions struct { ConfigmapUpdateOnChangeAnnotation string `json:"configmapUpdateOnChangeAnnotation"` // SecretUpdateOnChangeAnnotation is the annotation key used to detect changes in Secrets specified by name SecretUpdateOnChangeAnnotation string `json:"secretUpdateOnChangeAnnotation"` + // SecretProviderClassUpdateOnChangeAnnotation is the annotation key used to detect changes in SecretProviderClasses specified by name + SecretProviderClassUpdateOnChangeAnnotation string `json:"secretProviderClassUpdateOnChangeAnnotation"` // ReloaderAutoAnnotation is the annotation key used to detect changes in any referenced ConfigMaps or Secrets ReloaderAutoAnnotation string `json:"reloaderAutoAnnotation"` // IgnoreResourceAnnotation is the annotation key used to ignore resources from being watched @@ -40,10 +42,14 @@ type ReloaderOptions struct { ConfigmapReloaderAutoAnnotation string `json:"configmapReloaderAutoAnnotation"` // SecretReloaderAutoAnnotation is the annotation key used to detect changes in Secrets only SecretReloaderAutoAnnotation string `json:"secretReloaderAutoAnnotation"` + // SecretProviderClassReloaderAutoAnnotation is the annotation key used to detect changes in SecretProviderClasses only + SecretProviderClassReloaderAutoAnnotation string `json:"secretProviderClassReloaderAutoAnnotation"` // ConfigmapExcludeReloaderAnnotation is the annotation key containing comma-separated list of ConfigMaps to exclude from watching ConfigmapExcludeReloaderAnnotation string `json:"configmapExcludeReloaderAnnotation"` // SecretExcludeReloaderAnnotation is the annotation key containing comma-separated list of Secrets to exclude from watching SecretExcludeReloaderAnnotation string `json:"secretExcludeReloaderAnnotation"` + // SecretProviderClassExcludeReloaderAnnotation is the annotation key containing comma-separated list of SecretProviderClasses to exclude from watching + SecretProviderClassExcludeReloaderAnnotation string `json:"secretProviderClassExcludeReloaderAnnotation"` // AutoSearchAnnotation is the annotation key used to detect changes in ConfigMaps/Secrets tagged with SearchMatchAnnotation AutoSearchAnnotation string `json:"autoSearchAnnotation"` // SearchMatchAnnotation is the annotation key used to tag ConfigMaps/Secrets to be found by AutoSearchAnnotation @@ -71,6 +77,8 @@ type ReloaderOptions struct { SyncAfterRestart bool `json:"syncAfterRestart"` // EnableHA indicates whether High Availability mode is enabled with leader election EnableHA bool `json:"enableHA"` + // EnableCSIIntegration indicates whether CSI integration is enabled to watch SecretProviderClassPodStatus + EnableCSIIntegration bool `json:"enableCSIIntegration"` // WebhookUrl is the URL to send webhook notifications to instead of performing reloads WebhookUrl string `json:"webhookUrl"` // ResourcesToIgnore is a list of resource types to ignore (e.g., "configmaps" or "secrets") @@ -224,6 +232,7 @@ func ShouldReload(config Config, resourceType string, annotations Map, podAnnota typedAutoAnnotationEnabledValue, foundTypedAuto := annotations[config.TypedAutoAnnotation] excludeConfigmapAnnotationValue, foundExcludeConfigmap := annotations[options.ConfigmapExcludeReloaderAnnotation] excludeSecretAnnotationValue, foundExcludeSecret := annotations[options.SecretExcludeReloaderAnnotation] + excludeSecretProviderClassProviderAnnotationValue, foundExcludeSecretProviderClass := annotations[options.SecretProviderClassExcludeReloaderAnnotation] if !found && !foundAuto && !foundTypedAuto && !foundSearchAnn { annotations = podAnnotations @@ -244,6 +253,11 @@ func ShouldReload(config Config, resourceType string, annotations Map, podAnnota if foundExcludeSecret { isResourceExcluded = checkIfResourceIsExcluded(config.ResourceName, excludeSecretAnnotationValue) } + + case constants.SecretProviderClassEnvVarPostfix: + if foundExcludeSecretProviderClass { + isResourceExcluded = checkIfResourceIsExcluded(config.ResourceName, excludeSecretProviderClassProviderAnnotationValue) + } } if isResourceExcluded { @@ -252,15 +266,6 @@ func ShouldReload(config Config, resourceType string, annotations Map, podAnnota } } - reloaderEnabled, _ := strconv.ParseBool(reloaderEnabledValue) - typedAutoAnnotationEnabled, _ := strconv.ParseBool(typedAutoAnnotationEnabledValue) - if reloaderEnabled || typedAutoAnnotationEnabled || reloaderEnabledValue == "" && typedAutoAnnotationEnabledValue == "" && options.AutoReloadAll { - return ReloadCheckResult{ - ShouldReload: true, - AutoReload: true, - } - } - values := strings.Split(annotationValue, ",") for _, value := range values { value = strings.TrimSpace(value) @@ -283,6 +288,15 @@ func ShouldReload(config Config, resourceType string, annotations Map, podAnnota } } + reloaderEnabled, _ := strconv.ParseBool(reloaderEnabledValue) + typedAutoAnnotationEnabled, _ := strconv.ParseBool(typedAutoAnnotationEnabledValue) + if reloaderEnabled || typedAutoAnnotationEnabled || reloaderEnabledValue == "" && typedAutoAnnotationEnabledValue == "" && options.AutoReloadAll { + return ReloadCheckResult{ + ShouldReload: true, + AutoReload: true, + } + } + return ReloadCheckResult{ ShouldReload: false, } @@ -315,12 +329,15 @@ func GetCommandLineOptions() *ReloaderOptions { CommandLineOptions.AutoReloadAll = options.AutoReloadAll CommandLineOptions.ConfigmapUpdateOnChangeAnnotation = options.ConfigmapUpdateOnChangeAnnotation CommandLineOptions.SecretUpdateOnChangeAnnotation = options.SecretUpdateOnChangeAnnotation + CommandLineOptions.SecretProviderClassUpdateOnChangeAnnotation = options.SecretProviderClassUpdateOnChangeAnnotation CommandLineOptions.ReloaderAutoAnnotation = options.ReloaderAutoAnnotation CommandLineOptions.IgnoreResourceAnnotation = options.IgnoreResourceAnnotation CommandLineOptions.ConfigmapReloaderAutoAnnotation = options.ConfigmapReloaderAutoAnnotation CommandLineOptions.SecretReloaderAutoAnnotation = options.SecretReloaderAutoAnnotation + CommandLineOptions.SecretProviderClassReloaderAutoAnnotation = options.SecretProviderClassReloaderAutoAnnotation CommandLineOptions.ConfigmapExcludeReloaderAnnotation = options.ConfigmapExcludeReloaderAnnotation CommandLineOptions.SecretExcludeReloaderAnnotation = options.SecretExcludeReloaderAnnotation + CommandLineOptions.SecretProviderClassExcludeReloaderAnnotation = options.SecretProviderClassExcludeReloaderAnnotation CommandLineOptions.AutoSearchAnnotation = options.AutoSearchAnnotation CommandLineOptions.SearchMatchAnnotation = options.SearchMatchAnnotation CommandLineOptions.RolloutStrategyAnnotation = options.RolloutStrategyAnnotation @@ -331,6 +348,7 @@ func GetCommandLineOptions() *ReloaderOptions { CommandLineOptions.ReloadStrategy = options.ReloadStrategy CommandLineOptions.SyncAfterRestart = options.SyncAfterRestart CommandLineOptions.EnableHA = options.EnableHA + CommandLineOptions.EnableCSIIntegration = options.EnableCSIIntegration CommandLineOptions.WebhookUrl = options.WebhookUrl CommandLineOptions.ResourcesToIgnore = options.ResourcesToIgnore CommandLineOptions.WorkloadTypesToIgnore = options.WorkloadTypesToIgnore diff --git a/pkg/common/config.go b/pkg/common/config.go index 4227c2b..4421fa5 100644 --- a/pkg/common/config.go +++ b/pkg/common/config.go @@ -5,6 +5,7 @@ import ( "github.com/stakater/Reloader/internal/pkg/options" "github.com/stakater/Reloader/internal/pkg/util" v1 "k8s.io/api/core/v1" + csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" ) // Config contains rolling upgrade configuration parameters @@ -46,3 +47,16 @@ func GetSecretConfig(secret *v1.Secret) Config { Labels: secret.Labels, } } + +func GetSecretProviderClassPodStatusConfig(podStatus *csiv1.SecretProviderClassPodStatus) Config { + // As csi injects SecretProviderClass, we will create config for it instead of SecretProviderClassPodStatus + // ResourceAnnotations will be retrieved during PerformAction call + return Config{ + Namespace: podStatus.Namespace, + ResourceName: podStatus.Status.SecretProviderClassName, + Annotation: options.SecretProviderClassUpdateOnChangeAnnotation, + TypedAutoAnnotation: options.SecretProviderClassReloaderAutoAnnotation, + SHAValue: util.GetSHAfromSecretProviderClassPodStatus(podStatus.Status), + Type: constants.SecretProviderClassEnvVarPostfix, + } +} diff --git a/pkg/kube/client.go b/pkg/kube/client.go index 4230063..9582929 100644 --- a/pkg/kube/client.go +++ b/pkg/kube/client.go @@ -11,6 +11,7 @@ import ( "github.com/sirupsen/logrus" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" + csiclient "sigs.k8s.io/secrets-store-csi-driver/pkg/client/clientset/versioned" ) // Clients struct exposes interfaces for kubernetes as well as openshift if available @@ -18,11 +19,14 @@ type Clients struct { KubernetesClient kubernetes.Interface OpenshiftAppsClient appsclient.Interface ArgoRolloutClient argorollout.Interface + CSIClient csiclient.Interface } var ( // IsOpenshift is true if environment is Openshift, it is false if environment is Kubernetes IsOpenshift = isOpenshift() + // IsCSIEnabled is true if environment has CSI provider installed, otherwise false + IsCSIInstalled = isCSIInstalled() ) // GetClients returns a `Clients` object containing both openshift and kubernetes clients with an openshift identifier @@ -48,10 +52,20 @@ func GetClients() Clients { logrus.Warnf("Unable to create ArgoRollout client error = %v", err) } + var csiClient *csiclient.Clientset + + if IsCSIInstalled { + csiClient, err = GetCSIClient() + if err != nil { + logrus.Warnf("Unable to create CSI client error = %v", err) + } + } + return Clients{ KubernetesClient: client, OpenshiftAppsClient: appsClient, ArgoRolloutClient: rolloutClient, + CSIClient: csiClient, } } @@ -63,6 +77,28 @@ func GetArgoRolloutClient() (*argorollout.Clientset, error) { return argorollout.NewForConfig(config) } +func isCSIInstalled() bool { + client, err := GetKubernetesClient() + if err != nil { + logrus.Fatalf("Unable to create Kubernetes client error = %v", err) + } + _, err = client.RESTClient().Get().AbsPath("/apis/secrets-store.csi.x-k8s.io/v1").Do(context.TODO()).Raw() + if err == nil { + logrus.Info("CSI provider is installed") + return true + } + logrus.Info("CSI provider is not installed") + return false +} + +func GetCSIClient() (*csiclient.Clientset, error) { + config, err := getConfig() + if err != nil { + return nil, err + } + return csiclient.NewForConfig(config) +} + func isOpenshift() bool { client, err := GetKubernetesClient() if err != nil { diff --git a/pkg/kube/resourcemapper.go b/pkg/kube/resourcemapper.go index 89ac2af..bdb7858 100644 --- a/pkg/kube/resourcemapper.go +++ b/pkg/kube/resourcemapper.go @@ -3,11 +3,13 @@ package kube import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" + csiv1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" ) // ResourceMap are resources from where changes are going to be detected var ResourceMap = map[string]runtime.Object{ - "configmaps": &v1.ConfigMap{}, - "secrets": &v1.Secret{}, - "namespaces": &v1.Namespace{}, + "configmaps": &v1.ConfigMap{}, + "secrets": &v1.Secret{}, + "namespaces": &v1.Namespace{}, + "secretproviderclasspodstatuses": &csiv1.SecretProviderClassPodStatus{}, } diff --git a/test/loadtest/README.md b/test/loadtest/README.md new file mode 100644 index 0000000..7182bb3 --- /dev/null +++ b/test/loadtest/README.md @@ -0,0 +1,544 @@ +# Reloader Load Test Framework + +This framework provides A/B comparison testing between two Reloader container images. + +## Overview + +The load test framework: +1. Creates a local kind cluster (1 control-plane + 6 worker nodes) +2. Deploys Prometheus for metrics collection +3. Loads the provided Reloader container images into the cluster +4. Runs standardized test scenarios (S1-S13) +5. Collects metrics via Prometheus scraping +6. Generates comparison reports with pass/fail criteria + +## Prerequisites + +- Docker or Podman +- kind (Kubernetes in Docker) +- kubectl +- Go 1.22+ + +## Building + +```bash +cd test/loadtest +go build -o loadtest ./cmd/loadtest +``` + +## Quick Start + +```bash +# Compare two published images (e.g., different versions) +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=stakater/reloader:v1.1.0 + +# Run a specific scenario +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=stakater/reloader:v1.1.0 \ + --scenario=S2 \ + --duration=120 + +# Test only a single image (no comparison) +./loadtest run --new-image=myregistry/reloader:dev + +# Use local images built with docker/podman +./loadtest run \ + --old-image=localhost/reloader:baseline \ + --new-image=localhost/reloader:feature-branch + +# Skip cluster creation (use existing kind cluster) +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=stakater/reloader:v1.1.0 \ + --skip-cluster + +# Run all scenarios in parallel on 4 clusters (faster execution) +./loadtest run \ + --new-image=localhost/reloader:dev \ + --parallelism=4 + +# Run all 13 scenarios in parallel (one cluster per scenario) +./loadtest run \ + --new-image=localhost/reloader:dev \ + --parallelism=13 + +# Generate report from existing results +./loadtest report --scenario=S2 --results-dir=./results +``` + +## Command Line Options + +### Run Command + +| Option | Description | Default | +|--------|-------------|---------| +| `--old-image=IMAGE` | Container image for "old" version | - | +| `--new-image=IMAGE` | Container image for "new" version | - | +| `--scenario=ID` | Test scenario: S1-S13 or "all" | all | +| `--duration=SECONDS` | Test duration in seconds | 60 | +| `--parallelism=N` | Run N scenarios in parallel on N kind clusters | 1 | +| `--skip-cluster` | Skip kind cluster creation (use existing, only for parallelism=1) | false | +| `--results-dir=DIR` | Directory for results | ./results | + +**Note:** At least one of `--old-image` or `--new-image` is required. Provide both for A/B comparison. + +### Report Command + +| Option | Description | Default | +|--------|-------------|---------| +| `--scenario=ID` | Scenario to report on (required) | - | +| `--results-dir=DIR` | Directory containing results | ./results | +| `--output=FILE` | Output file (default: stdout) | - | + +## Test Scenarios + +| ID | Name | Description | +|-----|-----------------------|-------------------------------------------------| +| S1 | Burst Updates | Many ConfigMap/Secret updates in quick succession | +| S2 | Fan-Out | One ConfigMap used by many (50) workloads | +| S3 | High Cardinality | Many CMs/Secrets across many namespaces | +| S4 | No-Op Updates | Updates that don't change data (annotation only)| +| S5 | Workload Churn | Deployments created/deleted rapidly | +| S6 | Controller Restart | Restart controller pod under load | +| S7 | API Pressure | Many concurrent update requests | +| S8 | Large Objects | ConfigMaps > 100KB | +| S9 | Multi-Workload Types | Tests all workload types (Deploy, STS, DS) | +| S10 | Secrets + Mixed | Secrets and mixed ConfigMap+Secret workloads | +| S11 | Annotation Strategy | Tests `--reload-strategy=annotations` | +| S12 | Pause & Resume | Tests pause-period during rapid updates | +| S13 | Complex References | Init containers, valueFrom, projected volumes | + +## Metrics Reference + +This section explains each metric collected during load tests, what it measures, and what different values might indicate. + +### Counter Metrics (Totals) + +#### `reconcile_total` +**What it measures:** The total number of reconciliation loops executed by the controller. + +**What it indicates:** +- **Higher in new vs old:** The new controller-runtime implementation may batch events differently. This is often expected behavior, not a problem. +- **Lower in new vs old:** Better event batching/deduplication. Controller-runtime's work queue naturally deduplicates events. +- **Expected behavior:** The new implementation typically has *fewer* reconciles due to intelligent event batching. + +#### `action_total` +**What it measures:** The total number of reload actions triggered (rolling restarts of Deployments/StatefulSets/DaemonSets). + +**What it indicates:** +- **Should match expected value:** Both implementations should trigger the same number of reloads for the same workload. +- **Lower than expected:** Some updates were missed - potential bug or race condition. +- **Higher than expected:** Duplicate reloads triggered - inefficiency but not data loss. + +#### `reload_executed_total` +**What it measures:** Successful reload operations executed, labeled by `success=true/false`. + +**What it indicates:** +- **`success=true` count:** Number of workloads successfully restarted. +- **`success=false` count:** Failed restart attempts (API errors, permission issues). +- **Should match `action_total`:** If significantly lower, reloads are failing. + +#### `workloads_scanned_total` +**What it measures:** Number of workloads (Deployments, etc.) scanned when checking for ConfigMap/Secret references. + +**What it indicates:** +- **High count:** Controller is scanning many workloads per reconcile. +- **Expected behavior:** Should roughly match the number of workloads × number of reconciles. +- **Optimization signal:** If very high, namespace filtering or label selectors could help. + +#### `workloads_matched_total` +**What it measures:** Number of workloads that matched (reference the changed ConfigMap/Secret). + +**What it indicates:** +- **Should match `reload_executed_total`:** Every matched workload should be reloaded. +- **Higher than reloads:** Some matched workloads weren't reloaded (potential issue). + +#### `errors_total` +**What it measures:** Total errors encountered, labeled by error type. + +**What it indicates:** +- **Should be 0:** Any errors indicate problems. +- **Common causes:** API server timeouts, RBAC issues, resource conflicts. +- **Critical metric:** Non-zero errors in production should be investigated. + +### API Efficiency Metrics (REST Client) + +These metrics track Kubernetes API server calls made by Reloader. Lower values indicate more efficient operation with less API server load. + +#### `rest_client_requests_total` +**What it measures:** Total number of HTTP requests made to the Kubernetes API server. + +**What it indicates:** +- **Lower is better:** Fewer API calls means less load on the API server. +- **High count:** May indicate inefficient caching or excessive reconciles. +- **Comparison use:** Shows overall API efficiency between implementations. + +#### `rest_client_requests_get` +**What it measures:** Number of GET requests (fetching individual resources or listings). + +**What it indicates:** +- **Includes:** Fetching ConfigMaps, Secrets, Deployments, etc. +- **Higher count:** More frequent resource fetching, possibly due to cache misses. +- **Expected behavior:** Controller-runtime's caching should reduce GET requests compared to direct API calls. + +#### `rest_client_requests_patch` +**What it measures:** Number of PATCH requests (partial updates to resources). + +**What it indicates:** +- **Used for:** Rolling restart annotations on workloads. +- **Should correlate with:** `reload_executed_total` - each reload typically requires one PATCH. +- **Lower is better:** Fewer patches means more efficient batching or deduplication. + +#### `rest_client_requests_put` +**What it measures:** Number of PUT requests (full resource updates). + +**What it indicates:** +- **Used for:** Full object replacements (less common than PATCH). +- **Should be low:** Most updates use PATCH for efficiency. +- **High count:** May indicate suboptimal update strategy. + +#### `rest_client_requests_errors` +**What it measures:** Number of failed API requests (4xx/5xx responses). + +**What it indicates:** +- **Should be 0:** Errors indicate API server issues or permission problems. +- **Common causes:** Rate limiting, RBAC issues, resource conflicts, network issues. +- **Non-zero:** Investigate API server logs and Reloader permissions. + +### Latency Metrics (Percentiles) + +All latency metrics are reported in **seconds**. The report shows p50 (median), p95, and p99 percentiles. + +#### `reconcile_duration (s)` +**What it measures:** Time spent inside each reconcile loop, from start to finish. + +**What it indicates:** +- **p50 (median):** Typical reconcile time. Should be < 100ms for good performance. +- **p95:** 95th percentile - only 5% of reconciles take longer than this. +- **p99:** 99th percentile - indicates worst-case performance. + +**Interpreting differences:** +- **New higher than old:** Controller-runtime reconciles may do more work per loop but run fewer times. Check `reconcile_total` - if it's lower, this is expected. +- **Minor differences (< 0.5s absolute):** Not significant for sub-second values. + +#### `action_latency (s)` +**What it measures:** End-to-end time from ConfigMap/Secret change detection to workload restart triggered. + +**What it indicates:** +- **This is the user-facing latency:** How long users wait for their config changes to take effect. +- **p50 < 1s:** Excellent - most changes apply within a second. +- **p95 < 5s:** Good - even under load, changes apply quickly. +- **p99 > 10s:** May need investigation - some changes take too long. + +**What affects this:** +- API server responsiveness +- Number of workloads to scan +- Concurrent updates competing for resources + +### Understanding the Report + +#### Report Columns + +``` +Metric Old New Expected Old✓ New✓ Status +------ --- --- -------- ---- ---- ------ +action_total 100.00 100.00 100 ✓ ✓ pass +action_latency_p95 (s) 0.15 0.04 - - - pass +``` + +- **Old/New:** Measured values from each implementation +- **Expected:** Known expected value (for throughput metrics) +- **Old✓/New✓:** Whether the value is within 15% of expected (✓ = yes, ✗ = no, - = no expected value) +- **Status:** pass/fail based on comparison thresholds + +#### Pass/Fail Logic + +| Metric Type | Pass Condition | +|-------------|----------------| +| Throughput (action_total, reload_executed_total) | New value within 15% of expected | +| Latency (p50, p95, p99) | New not more than threshold% worse than old, OR absolute difference < minimum threshold | +| Errors | New ≤ Old (ideally both 0) | +| API Efficiency (rest_client_requests_*) | New ≤ Old (lower is better), or New not more than 50% higher | + +#### Latency Thresholds + +Latency comparisons use both percentage AND absolute thresholds to avoid false failures: + +| Metric | Max % Worse | Min Absolute Diff | +|--------|-------------|-------------------| +| p50 | 100% | 0.5s | +| p95 | 100% | 1.0s | +| p99 | 100% | 1.0s | + +**Example:** If old p50 = 0.01s and new p50 = 0.08s: +- Percentage difference: +700% (would fail % check) +- Absolute difference: 0.07s (< 0.5s threshold) +- **Result: PASS** (both values are fast enough that the difference doesn't matter) + +### Resource Consumption Metrics + +These metrics track CPU, memory, and Go runtime resource usage. Lower values generally indicate more efficient operation. + +#### Memory Metrics + +| Metric | Description | Unit | +|--------|-------------|------| +| `memory_rss_mb_avg` | Average RSS (resident set size) memory | MB | +| `memory_rss_mb_max` | Peak RSS memory during test | MB | +| `memory_heap_mb_avg` | Average Go heap allocation | MB | +| `memory_heap_mb_max` | Peak Go heap allocation | MB | + +**What to watch for:** +- **High RSS:** May indicate memory leaks or inefficient caching +- **High heap:** Many objects being created (check GC metrics) +- **Growing over time:** Potential memory leak + +#### CPU Metrics + +| Metric | Description | Unit | +|--------|-------------|------| +| `cpu_cores_avg` | Average CPU usage rate | cores | +| `cpu_cores_max` | Peak CPU usage rate | cores | + +**What to watch for:** +- **High CPU:** Inefficient algorithms or excessive reconciles +- **Spiky max:** May indicate burst handling issues + +#### Go Runtime Metrics + +| Metric | Description | Unit | +|--------|-------------|------| +| `goroutines_avg` | Average goroutine count | count | +| `goroutines_max` | Peak goroutine count | count | +| `gc_pause_p99_ms` | 99th percentile GC pause time | ms | + +**What to watch for:** +- **High goroutines:** Potential goroutine leak or unbounded concurrency +- **High GC pause:** Large heap or allocation pressure + +### Scenario-Specific Expectations + +| Scenario | Key Metrics to Watch | Expected Behavior | +|----------|---------------------|-------------------| +| S1 (Burst) | action_latency_p99, cpu_cores_max, goroutines_max | Should handle bursts without queue backup | +| S2 (Fan-Out) | reconcile_total, workloads_matched, memory_rss_mb_max | One CM change → 50 workload reloads | +| S3 (High Cardinality) | reconcile_duration, memory_heap_mb_avg | Many namespaces shouldn't increase memory | +| S4 (No-Op) | action_total = 0, cpu_cores_avg should be low | Minimal resource usage for no-op | +| S5 (Churn) | errors_total, goroutines_avg | Graceful handling, no goroutine leak | +| S6 (Restart) | All metrics captured | Metrics survive controller restart | +| S7 (API Pressure) | errors_total, cpu_cores_max, goroutines_max | No errors under concurrent load | +| S8 (Large Objects) | memory_rss_mb_max, gc_pause_p99_ms | Large ConfigMaps don't cause OOM or GC issues | +| S9 (Multi-Workload) | reload_executed_total per type | All workload types (Deploy, STS, DS) reload | +| S10 (Secrets) | reload_executed_total, workloads_matched | Both Secrets and ConfigMaps trigger reloads | +| S11 (Annotation) | workload annotations present | Deployments get `last-reloaded-from` annotation | +| S12 (Pause) | reload_executed_total << updates | Pause-period reduces reload frequency | +| S13 (Complex) | reload_executed_total | All reference types trigger reloads | + +### Troubleshooting + +#### New implementation shows 0 for all metrics +- Check if Prometheus is scraping the new Reloader pod +- Verify pod annotations: `prometheus.io/scrape: "true"` +- Check Prometheus targets: `http://localhost:9091/targets` + +#### Metrics don't match expected values +- Verify test ran to completion (check logs) +- Ensure Prometheus scraped final metrics (18s wait after test) +- Check for pod restarts during test (metrics reset on restart - handled by `increase()`) + +#### High latency in new implementation +- Check Reloader pod resource limits +- Look for API server throttling in logs +- Compare `reconcile_total` - fewer reconciles with higher duration may be normal + +#### REST client errors are non-zero +- **Common causes:** + - Optional CRD schemes registered but CRDs not installed (e.g., Argo Rollouts, OpenShift DeploymentConfig) + - API server rate limiting under high load + - RBAC permissions missing for certain resource types +- **Argo Rollouts errors:** If you see ~4 errors per test, ensure `--enable-argo-rollouts=false` if not using Argo Rollouts +- **OpenShift errors:** Similarly, ensure DeploymentConfig support is disabled on non-OpenShift clusters + +#### REST client requests much higher in new implementation +- Check if caching is working correctly +- Look for excessive re-queuing in controller logs +- Compare `reconcile_total` - more reconciles naturally means more API calls + +## Report Format + +The report generator produces a comparison table with units and expected value indicators: + +``` +================================================================================ + RELOADER A/B COMPARISON REPORT +================================================================================ + +Scenario: S2 +Generated: 2026-01-03 14:30:00 +Status: PASS +Summary: All metrics within acceptable thresholds + +Test: S2: Fan-out test - 1 CM update triggers 50 deployment reloads + +-------------------------------------------------------------------------------- + METRIC COMPARISONS +-------------------------------------------------------------------------------- +(Old✓/New✓ = meets expected value within 15%) + +Metric Old New Expected Old✓ New✓ Status +------ --- --- -------- ---- ---- ------ +reconcile_total 50.00 25.00 - - - pass +reconcile_duration_p50 (s) 0.01 0.05 - - - pass +reconcile_duration_p95 (s) 0.02 0.15 - - - pass +action_total 50.00 50.00 50 ✓ ✓ pass +action_latency_p50 (s) 0.05 0.03 - - - pass +action_latency_p95 (s) 0.12 0.08 - - - pass +errors_total 0.00 0.00 - - - pass +reload_executed_total 50.00 50.00 50 ✓ ✓ pass +workloads_scanned_total 50.00 50.00 50 ✓ ✓ pass +workloads_matched_total 50.00 50.00 50 ✓ ✓ pass +rest_client_requests_total 850 720 - - - pass +rest_client_requests_get 500 420 - - - pass +rest_client_requests_patch 300 250 - - - pass +rest_client_requests_errors 0 0 - - - pass +``` + +Reports are saved to `results//report.txt` after each test. + +## Directory Structure + +``` +test/loadtest/ +├── cmd/ +│ └── loadtest/ # Unified CLI (run + report) +│ └── main.go +├── internal/ +│ ├── cluster/ # Kind cluster management +│ │ └── kind.go +│ ├── prometheus/ # Prometheus deployment & querying +│ │ └── prometheus.go +│ ├── reloader/ # Reloader deployment +│ │ └── deploy.go +│ └── scenarios/ # Test scenario implementations +│ └── scenarios.go +├── manifests/ +│ └── prometheus.yaml # Prometheus deployment manifest +├── results/ # Generated after tests +│ └── / +│ ├── old/ # Old version data +│ │ ├── *.json # Prometheus metric snapshots +│ │ └── reloader.log # Reloader pod logs +│ ├── new/ # New version data +│ │ ├── *.json # Prometheus metric snapshots +│ │ └── reloader.log # Reloader pod logs +│ ├── expected.json # Expected values from test +│ └── report.txt # Comparison report +├── go.mod +├── go.sum +└── README.md +``` + +## Building Local Images for Testing + +If you want to test local code changes: + +```bash +# Build the new Reloader image from current source +docker build -t localhost/reloader:dev -f Dockerfile . + +# Build from a different branch/commit +git checkout feature-branch +docker build -t localhost/reloader:feature -f Dockerfile . + +# Then run comparison +./loadtest run \ + --old-image=stakater/reloader:v1.0.0 \ + --new-image=localhost/reloader:feature +``` + +## Interpreting Results + +### PASS +All metrics are within acceptable thresholds. The new implementation is comparable or better than the old one. + +### FAIL +One or more metrics exceeded thresholds. Review the specific metrics: +- **Latency degradation**: p95/p99 latencies are significantly higher +- **Missed reloads**: `reload_executed_total` differs significantly +- **Errors increased**: `errors_total` is higher in new version + +### Investigation + +If tests fail, check: +1. Pod logs: `kubectl logs -n reloader-new deployment/reloader` (or check `results//new/reloader.log`) +2. Resource usage: `kubectl top pods -n reloader-new` +3. Events: `kubectl get events -n reloader-test` + +## Parallel Execution + +The `--parallelism` option enables running scenarios on multiple kind clusters simultaneously, significantly reducing total test time. + +### How It Works + +1. **Multiple Clusters**: Creates N kind clusters named `reloader-loadtest-0`, `reloader-loadtest-1`, etc. +2. **Separate Prometheus**: Each cluster gets its own Prometheus instance with a unique port (9091, 9092, etc.) +3. **Worker Pool**: Scenarios are distributed to workers via a channel, with each worker running on its own cluster +4. **Independent Execution**: Each scenario runs in complete isolation with no resource contention + +### Usage + +```bash +# Run 4 scenarios at a time (creates 4 clusters) +./loadtest run --new-image=my-image:tag --parallelism=4 + +# Run all 13 scenarios in parallel (creates 13 clusters) +./loadtest run --new-image=my-image:tag --parallelism=13 --scenario=all +``` + +### Resource Requirements + +Parallel execution requires significant system resources: + +| Parallelism | Clusters | Est. Memory | Est. CPU | +|-------------|----------|-------------|----------| +| 1 (default) | 1 | ~4GB | 2-4 cores | +| 4 | 4 | ~16GB | 8-16 cores | +| 13 | 13 | ~52GB | 26-52 cores | + +### Notes + +- The `--skip-cluster` option is not supported with parallelism > 1 +- Each worker loads images independently, so initial setup takes longer +- All results are written to the same `--results-dir` with per-scenario subdirectories +- If a cluster setup fails, remaining workers continue with available clusters +- Parallelism automatically reduces to match scenario count if set higher + +## CI Integration + +### GitHub Actions + +Load tests can be triggered on pull requests by commenting `/loadtest`: + +``` +/loadtest +``` + +This will: +1. Build a container image from the PR branch +2. Run all load test scenarios against it +3. Post results as a PR comment +4. Upload detailed results as artifacts + +### Make Target + +Run load tests locally or in CI: + +```bash +# From repository root +make loadtest +``` + +This builds the container image and runs all scenarios with a 60-second duration. diff --git a/test/loadtest/cmd/loadtest/main.go b/test/loadtest/cmd/loadtest/main.go new file mode 100644 index 0000000..510ce0b --- /dev/null +++ b/test/loadtest/cmd/loadtest/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/stakater/Reloader/test/loadtest/internal/cmd" + +func main() { + cmd.Execute() +} diff --git a/test/loadtest/go.mod b/test/loadtest/go.mod new file mode 100644 index 0000000..e96ed76 --- /dev/null +++ b/test/loadtest/go.mod @@ -0,0 +1,52 @@ +module github.com/stakater/Reloader/test/loadtest + +go 1.25 + +require ( + github.com/spf13/cobra v1.8.1 + k8s.io/api v0.31.0 + k8s.io/apimachinery v0.31.0 + k8s.io/client-go v0.31.0 +) + +require ( + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.4 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/imdario/mergo v0.3.6 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/term v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/test/loadtest/go.sum b/test/loadtest/go.sum new file mode 100644 index 0000000..f4f0ad8 --- /dev/null +++ b/test/loadtest/go.sum @@ -0,0 +1,160 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= +github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= +github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo= +k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE= +k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc= +k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8= +k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/test/loadtest/internal/cluster/kind.go b/test/loadtest/internal/cluster/kind.go new file mode 100644 index 0000000..1fde314 --- /dev/null +++ b/test/loadtest/internal/cluster/kind.go @@ -0,0 +1,314 @@ +// Package cluster provides kind cluster management functionality. +package cluster + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "strings" + "time" +) + +// Config holds configuration for kind cluster operations. +type Config struct { + Name string + ContainerRuntime string // "docker" or "podman" + PortOffset int // Offset for host port mappings (for parallel clusters) +} + +// Manager handles kind cluster operations. +type Manager struct { + cfg Config +} + +// NewManager creates a new cluster manager. +func NewManager(cfg Config) *Manager { + return &Manager{cfg: cfg} +} + +// DetectContainerRuntime finds available container runtime. +// It checks if the runtime daemon is actually running, not just if the binary exists. +func DetectContainerRuntime() (string, error) { + if _, err := exec.LookPath("docker"); err == nil { + cmd := exec.Command("docker", "info") + if err := cmd.Run(); err == nil { + return "docker", nil + } + } + if _, err := exec.LookPath("podman"); err == nil { + cmd := exec.Command("podman", "info") + if err := cmd.Run(); err == nil { + return "podman", nil + } + } + return "", fmt.Errorf("neither docker nor podman is running") +} + +// Exists checks if the cluster already exists. +func (m *Manager) Exists() bool { + cmd := exec.Command("kind", "get", "clusters") + out, err := cmd.Output() + if err != nil { + return false + } + for _, line := range strings.Split(string(out), "\n") { + if strings.TrimSpace(line) == m.cfg.Name { + return true + } + } + return false +} + +// Delete deletes the kind cluster. +func (m *Manager) Delete(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "kind", "delete", "cluster", "--name", m.cfg.Name) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// Create creates a new kind cluster with optimized settings. +func (m *Manager) Create(ctx context.Context) error { + if m.cfg.ContainerRuntime == "podman" { + os.Setenv("KIND_EXPERIMENTAL_PROVIDER", "podman") + } + + if m.Exists() { + fmt.Printf("Cluster %s already exists, deleting...\n", m.cfg.Name) + if err := m.Delete(ctx); err != nil { + return fmt.Errorf("deleting existing cluster: %w", err) + } + } + + httpPort := 8080 + m.cfg.PortOffset + httpsPort := 8443 + m.cfg.PortOffset + + config := fmt.Sprintf(`kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +networking: + podSubnet: "10.244.0.0/16" + serviceSubnet: "10.96.0.0/16" +nodes: +- role: control-plane + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" + - | + kind: ClusterConfiguration + apiServer: + extraArgs: + max-requests-inflight: "800" + max-mutating-requests-inflight: "400" + watch-cache-sizes: "configmaps#1000,secrets#1000,pods#1000" + controllerManager: + extraArgs: + kube-api-qps: "200" + kube-api-burst: "200" + scheduler: + extraArgs: + kube-api-qps: "200" + kube-api-burst: "200" + extraPortMappings: + - containerPort: 80 + hostPort: %d + protocol: TCP + - containerPort: 443 + hostPort: %d + protocol: TCP +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +- role: worker + kubeadmConfigPatches: + - | + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: + max-pods: "250" + kube-api-qps: "50" + kube-api-burst: "100" + serialize-image-pulls: "false" + event-qps: "50" + event-burst: "100" +`, httpPort, httpsPort) + cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", m.cfg.Name, "--config=-") + cmd.Stdin = strings.NewReader(config) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// GetKubeconfig returns the kubeconfig for the cluster. +func (m *Manager) GetKubeconfig() (string, error) { + cmd := exec.Command("kind", "get", "kubeconfig", "--name", m.cfg.Name) + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("getting kubeconfig: %w", err) + } + return string(out), nil +} + +// Context returns the kubectl context name for this cluster. +func (m *Manager) Context() string { + return "kind-" + m.cfg.Name +} + +// Name returns the cluster name. +func (m *Manager) Name() string { + return m.cfg.Name +} + +// LoadImage loads a container image into the kind cluster. +func (m *Manager) LoadImage(ctx context.Context, image string) error { + if !m.imageExistsLocally(image) { + fmt.Printf(" Image not found locally, pulling: %s\n", image) + pullCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image) + pullCmd.Stdout = os.Stdout + pullCmd.Stderr = os.Stderr + if err := pullCmd.Run(); err != nil { + return fmt.Errorf("pulling image %s: %w", image, err) + } + } else { + fmt.Printf(" Image found locally: %s\n", image) + } + + fmt.Printf(" Copying image to kind cluster...\n") + + if m.cfg.ContainerRuntime == "podman" { + tmpFile := fmt.Sprintf("/tmp/kind-image-%d.tar", time.Now().UnixNano()) + defer os.Remove(tmpFile) + + saveCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "save", image, "-o", tmpFile) + if err := saveCmd.Run(); err != nil { + return fmt.Errorf("saving image %s: %w", image, err) + } + + loadCmd := exec.CommandContext(ctx, "kind", "load", "image-archive", tmpFile, "--name", m.cfg.Name) + loadCmd.Stdout = os.Stdout + loadCmd.Stderr = os.Stderr + if err := loadCmd.Run(); err != nil { + return fmt.Errorf("loading image archive: %w", err) + } + } else { + loadCmd := exec.CommandContext(ctx, "kind", "load", "docker-image", image, "--name", m.cfg.Name) + loadCmd.Stdout = os.Stdout + loadCmd.Stderr = os.Stderr + if err := loadCmd.Run(); err != nil { + return fmt.Errorf("loading image %s: %w", image, err) + } + } + + return nil +} + +// imageExistsLocally checks if an image exists in the local container runtime. +func (m *Manager) imageExistsLocally(image string) bool { + cmd := exec.Command(m.cfg.ContainerRuntime, "image", "exists", image) + if err := cmd.Run(); err == nil { + return true + } + + cmd = exec.Command(m.cfg.ContainerRuntime, "image", "inspect", image) + if err := cmd.Run(); err == nil { + return true + } + + cmd = exec.Command(m.cfg.ContainerRuntime, "images", "--format", "{{.Repository}}:{{.Tag}}") + out, err := cmd.Output() + if err == nil { + for _, line := range strings.Split(string(out), "\n") { + if strings.TrimSpace(line) == image { + return true + } + } + } + + return false +} + +// PullImage pulls an image using the container runtime. +func (m *Manager) PullImage(ctx context.Context, image string) error { + cmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// ExecKubectl runs a kubectl command against the cluster. +func (m *Manager) ExecKubectl(ctx context.Context, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, "kubectl", args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("%w: %s", err, stderr.String()) + } + return stdout.Bytes(), nil +} diff --git a/test/loadtest/internal/cmd/report.go b/test/loadtest/internal/cmd/report.go new file mode 100644 index 0000000..7bf4cc6 --- /dev/null +++ b/test/loadtest/internal/cmd/report.go @@ -0,0 +1,860 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "log" + "math" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/spf13/cobra" +) + +var ( + reportScenario string + reportResultsDir string + reportOutputFile string + reportFormat string +) + +var reportCmd = &cobra.Command{ + Use: "report", + Short: "Generate comparison report for a scenario", + Long: `Generate a detailed report for a specific test scenario. + +Examples: + # Generate report for a scenario + loadtest report --scenario=S2 --results-dir=./results + + # Generate JSON report + loadtest report --scenario=S2 --format=json`, + Run: func(cmd *cobra.Command, args []string) { + reportCommand() + }, +} + +func init() { + reportCmd.Flags().StringVar(&reportScenario, "scenario", "", "Scenario to report on (required)") + reportCmd.Flags().StringVar(&reportResultsDir, "results-dir", "./results", "Directory containing results") + reportCmd.Flags().StringVar(&reportOutputFile, "output", "", "Output file (default: stdout)") + reportCmd.Flags().StringVar(&reportFormat, "format", "text", "Output format: text, json, markdown") + reportCmd.MarkFlagRequired("scenario") +} + +// PrometheusResponse represents a Prometheus API response for report parsing. +type PrometheusResponse struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric map[string]string `json:"metric"` + Value []interface{} `json:"value"` + } `json:"result"` + } `json:"data"` +} + +// MetricComparison represents the comparison of a single metric. +type MetricComparison struct { + Name string `json:"name"` + DisplayName string `json:"display_name"` + Unit string `json:"unit"` + IsCounter bool `json:"is_counter"` + OldValue float64 `json:"old_value"` + NewValue float64 `json:"new_value"` + Expected float64 `json:"expected"` + Difference float64 `json:"difference"` + DiffPct float64 `json:"diff_pct"` + Status string `json:"status"` + Threshold float64 `json:"threshold"` + OldMeetsExpected string `json:"old_meets_expected"` + NewMeetsExpected string `json:"new_meets_expected"` +} + +type metricInfo struct { + unit string + isCounter bool +} + +var metricInfoMap = map[string]metricInfo{ + "reconcile_total": {unit: "count", isCounter: true}, + "reconcile_duration_p50": {unit: "s", isCounter: false}, + "reconcile_duration_p95": {unit: "s", isCounter: false}, + "reconcile_duration_p99": {unit: "s", isCounter: false}, + "action_total": {unit: "count", isCounter: true}, + "action_latency_p50": {unit: "s", isCounter: false}, + "action_latency_p95": {unit: "s", isCounter: false}, + "action_latency_p99": {unit: "s", isCounter: false}, + "errors_total": {unit: "count", isCounter: true}, + "reload_executed_total": {unit: "count", isCounter: true}, + "workloads_scanned_total": {unit: "count", isCounter: true}, + "workloads_matched_total": {unit: "count", isCounter: true}, + "skipped_total_no_data_change": {unit: "count", isCounter: true}, + "rest_client_requests_total": {unit: "count", isCounter: true}, + "rest_client_requests_get": {unit: "count", isCounter: true}, + "rest_client_requests_patch": {unit: "count", isCounter: true}, + "rest_client_requests_put": {unit: "count", isCounter: true}, + "rest_client_requests_errors": {unit: "count", isCounter: true}, + "memory_rss_mb_avg": {unit: "MB", isCounter: false}, + "memory_rss_mb_max": {unit: "MB", isCounter: false}, + "memory_heap_mb_avg": {unit: "MB", isCounter: false}, + "memory_heap_mb_max": {unit: "MB", isCounter: false}, + "cpu_cores_avg": {unit: "cores", isCounter: false}, + "cpu_cores_max": {unit: "cores", isCounter: false}, + "goroutines_avg": {unit: "count", isCounter: false}, + "goroutines_max": {unit: "count", isCounter: false}, + "gc_pause_p99_ms": {unit: "ms", isCounter: false}, +} + +// ReportExpectedMetrics matches the expected metrics from test scenarios. +type ReportExpectedMetrics struct { + ActionTotal int `json:"action_total"` + ReloadExecutedTotal int `json:"reload_executed_total"` + ReconcileTotal int `json:"reconcile_total"` + WorkloadsScannedTotal int `json:"workloads_scanned_total"` + WorkloadsMatchedTotal int `json:"workloads_matched_total"` + SkippedTotal int `json:"skipped_total"` + Description string `json:"description"` +} + +// ScenarioReport represents the full report for a scenario. +type ScenarioReport struct { + Scenario string `json:"scenario"` + Timestamp time.Time `json:"timestamp"` + Comparisons []MetricComparison `json:"comparisons"` + OverallStatus string `json:"overall_status"` + Summary string `json:"summary"` + PassCriteria []string `json:"pass_criteria"` + FailedCriteria []string `json:"failed_criteria"` + Expected ReportExpectedMetrics `json:"expected"` + TestDescription string `json:"test_description"` +} + +// MetricType defines how to evaluate a metric. +type MetricType int + +const ( + LowerIsBetter MetricType = iota + ShouldMatch + HigherIsBetter + Informational +) + +type thresholdConfig struct { + maxDiff float64 + metricType MetricType + minAbsDiff float64 +} + +var thresholds = map[string]thresholdConfig{ + "reconcile_total": {maxDiff: 60.0, metricType: LowerIsBetter}, + "reconcile_duration_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5}, + "reconcile_duration_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "reconcile_duration_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "action_latency_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5}, + "action_latency_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "action_latency_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0}, + "errors_total": {maxDiff: 0.0, metricType: LowerIsBetter}, + "action_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "reload_executed_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "workloads_scanned_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "workloads_matched_total": {maxDiff: 15.0, metricType: ShouldMatch}, + "skipped_total_no_data_change": {maxDiff: 20.0, metricType: ShouldMatch}, + "rest_client_requests_total": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50}, + "rest_client_requests_get": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50}, + "rest_client_requests_patch": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50}, + "rest_client_requests_put": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 20}, + "rest_client_requests_errors": {maxDiff: 0.0, metricType: LowerIsBetter, minAbsDiff: 100}, + "memory_rss_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20}, + "memory_rss_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 30}, + "memory_heap_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 15}, + "memory_heap_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20}, + "cpu_cores_avg": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.1}, + "cpu_cores_max": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.2}, + "goroutines_avg": {metricType: Informational}, + "goroutines_max": {metricType: Informational}, + "gc_pause_p99_ms": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 5}, +} + +func reportCommand() { + if reportScenario == "" { + log.Fatal("--scenario is required for report command") + } + + report, err := generateScenarioReport(reportScenario, reportResultsDir) + if err != nil { + log.Fatalf("Failed to generate report: %v", err) + } + + var output string + switch OutputFormat(reportFormat) { + case OutputFormatJSON: + output = renderScenarioReportJSON(report) + case OutputFormatMarkdown: + output = renderScenarioReportMarkdown(report) + default: + output = renderScenarioReport(report) + } + + if reportOutputFile != "" { + if err := os.WriteFile(reportOutputFile, []byte(output), 0644); err != nil { + log.Fatalf("Failed to write output file: %v", err) + } + log.Printf("Report written to %s", reportOutputFile) + } else { + fmt.Println(output) + } +} + +func generateScenarioReport(scenario, resultsDir string) (*ScenarioReport, error) { + oldDir := filepath.Join(resultsDir, scenario, "old") + newDir := filepath.Join(resultsDir, scenario, "new") + scenarioDir := filepath.Join(resultsDir, scenario) + + _, oldErr := os.Stat(oldDir) + _, newErr := os.Stat(newDir) + hasOld := oldErr == nil + hasNew := newErr == nil + isComparison := hasOld && hasNew + + singleVersion := "" + singleDir := "" + if !isComparison { + if hasNew { + singleVersion = "new" + singleDir = newDir + } else if hasOld { + singleVersion = "old" + singleDir = oldDir + } else { + return nil, fmt.Errorf("no results found in %s", scenarioDir) + } + } + + report := &ScenarioReport{ + Scenario: scenario, + Timestamp: time.Now(), + } + + expectedPath := filepath.Join(scenarioDir, "expected.json") + if data, err := os.ReadFile(expectedPath); err == nil { + if err := json.Unmarshal(data, &report.Expected); err != nil { + log.Printf("Warning: Could not parse expected metrics: %v", err) + } else { + report.TestDescription = report.Expected.Description + } + } + + if !isComparison { + return generateSingleVersionReport(report, singleDir, singleVersion, scenario) + } + + metricsToCompare := []struct { + name string + file string + selector func(data PrometheusResponse) float64 + }{ + {"reconcile_total", "reloader_reconcile_total.json", sumAllValues}, + {"reconcile_duration_p50", "reconcile_p50.json", getFirstValue}, + {"reconcile_duration_p95", "reconcile_p95.json", getFirstValue}, + {"reconcile_duration_p99", "reconcile_p99.json", getFirstValue}, + {"action_total", "reloader_action_total.json", sumAllValues}, + {"action_latency_p50", "action_p50.json", getFirstValue}, + {"action_latency_p95", "action_p95.json", getFirstValue}, + {"action_latency_p99", "action_p99.json", getFirstValue}, + {"errors_total", "reloader_errors_total.json", sumAllValues}, + {"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues}, + {"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues}, + {"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues}, + {"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue}, + {"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue}, + {"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue}, + {"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue}, + {"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue}, + {"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB}, + {"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB}, + {"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB}, + {"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB}, + {"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue}, + {"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue}, + {"goroutines_avg", "goroutines_avg.json", getFirstValue}, + {"goroutines_max", "goroutines_max.json", getFirstValue}, + {"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs}, + } + + expectedValues := map[string]float64{ + "action_total": float64(report.Expected.ActionTotal), + "reload_executed_total": float64(report.Expected.ReloadExecutedTotal), + "reconcile_total": float64(report.Expected.ReconcileTotal), + "workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal), + "workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal), + "skipped_total": float64(report.Expected.SkippedTotal), + } + + metricValues := make(map[string]struct{ old, new, expected float64 }) + + for _, m := range metricsToCompare { + oldData, err := loadMetricFile(filepath.Join(oldDir, m.file)) + if err != nil { + log.Printf("Warning: Could not load old metric %s: %v", m.name, err) + continue + } + + newData, err := loadMetricFile(filepath.Join(newDir, m.file)) + if err != nil { + log.Printf("Warning: Could not load new metric %s: %v", m.name, err) + continue + } + + oldValue := m.selector(oldData) + newValue := m.selector(newData) + expected := expectedValues[m.name] + + metricValues[m.name] = struct{ old, new, expected float64 }{oldValue, newValue, expected} + } + + newMeetsActionExpected := false + newReconcileIsZero := false + isChurnScenario := scenario == "S5" + if v, ok := metricValues["action_total"]; ok && v.expected > 0 { + tolerance := v.expected * 0.15 + newMeetsActionExpected = math.Abs(v.new-v.expected) <= tolerance + } + if v, ok := metricValues["reconcile_total"]; ok { + newReconcileIsZero = v.new == 0 + } + + for _, m := range metricsToCompare { + v, ok := metricValues[m.name] + if !ok { + continue + } + + comparison := compareMetricWithExpected(m.name, v.old, v.new, v.expected) + + if strings.HasPrefix(m.name, "rest_client_requests") { + if newMeetsActionExpected && comparison.Status != "pass" { + if oldMeets, ok := metricValues["action_total"]; ok { + oldTolerance := oldMeets.expected * 0.15 + oldMissed := math.Abs(oldMeets.old-oldMeets.expected) > oldTolerance + if oldMissed { + comparison.Status = "pass" + } + } + } + if newReconcileIsZero && comparison.Status != "pass" { + comparison.Status = "pass" + } + } + + if isChurnScenario { + if m.name == "errors_total" { + if v.new < 50 && v.old < 50 { + comparison.Status = "pass" + } else if v.new <= v.old*1.5 { + comparison.Status = "pass" + } + } + if m.name == "action_total" || m.name == "reload_executed_total" { + if v.old > 0 { + diff := math.Abs(v.new-v.old) / v.old * 100 + if diff <= 20 { + comparison.Status = "pass" + } + } else if v.new > 0 { + comparison.Status = "pass" + } + } + } + + report.Comparisons = append(report.Comparisons, comparison) + + if comparison.Status == "pass" { + report.PassCriteria = append(report.PassCriteria, m.name) + } else if comparison.Status == "fail" { + report.FailedCriteria = append(report.FailedCriteria, m.name) + } + } + + if len(report.FailedCriteria) == 0 { + report.OverallStatus = "PASS" + report.Summary = "All metrics within acceptable thresholds" + } else { + report.OverallStatus = "FAIL" + report.Summary = fmt.Sprintf("%d metrics failed: %s", + len(report.FailedCriteria), + strings.Join(report.FailedCriteria, ", ")) + } + + return report, nil +} + +func generateSingleVersionReport(report *ScenarioReport, dataDir, version, scenario string) (*ScenarioReport, error) { + metricsToCollect := []struct { + name string + file string + selector func(data PrometheusResponse) float64 + }{ + {"reconcile_total", "reloader_reconcile_total.json", sumAllValues}, + {"reconcile_duration_p50", "reconcile_p50.json", getFirstValue}, + {"reconcile_duration_p95", "reconcile_p95.json", getFirstValue}, + {"reconcile_duration_p99", "reconcile_p99.json", getFirstValue}, + {"action_total", "reloader_action_total.json", sumAllValues}, + {"action_latency_p50", "action_p50.json", getFirstValue}, + {"action_latency_p95", "action_p95.json", getFirstValue}, + {"action_latency_p99", "action_p99.json", getFirstValue}, + {"errors_total", "reloader_errors_total.json", sumAllValues}, + {"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues}, + {"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues}, + {"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues}, + {"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue}, + {"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue}, + {"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue}, + {"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue}, + {"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue}, + {"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB}, + {"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB}, + {"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB}, + {"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB}, + {"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue}, + {"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue}, + {"goroutines_avg", "goroutines_avg.json", getFirstValue}, + {"goroutines_max", "goroutines_max.json", getFirstValue}, + {"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs}, + } + + expectedValues := map[string]float64{ + "action_total": float64(report.Expected.ActionTotal), + "reload_executed_total": float64(report.Expected.ReloadExecutedTotal), + "reconcile_total": float64(report.Expected.ReconcileTotal), + "workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal), + "workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal), + "skipped_total": float64(report.Expected.SkippedTotal), + } + + for _, m := range metricsToCollect { + data, err := loadMetricFile(filepath.Join(dataDir, m.file)) + if err != nil { + log.Printf("Warning: Could not load metric %s: %v", m.name, err) + continue + } + + value := m.selector(data) + expected := expectedValues[m.name] + + info := metricInfoMap[m.name] + if info.unit == "" { + info = metricInfo{unit: "count", isCounter: true} + } + + displayName := m.name + if info.unit != "count" { + displayName = fmt.Sprintf("%s (%s)", m.name, info.unit) + } + + status := "info" + meetsExp := "-" + + if expected > 0 { + meetsExp = meetsExpected(value, expected) + threshold, ok := thresholds[m.name] + if ok && threshold.metricType == ShouldMatch { + if meetsExp == "✓" { + status = "pass" + report.PassCriteria = append(report.PassCriteria, m.name) + } else { + status = "fail" + report.FailedCriteria = append(report.FailedCriteria, m.name) + } + } + } + + if info.isCounter { + value = math.Round(value) + } + + report.Comparisons = append(report.Comparisons, MetricComparison{ + Name: m.name, + DisplayName: displayName, + Unit: info.unit, + IsCounter: info.isCounter, + OldValue: 0, + NewValue: value, + Expected: expected, + OldMeetsExpected: "-", + NewMeetsExpected: meetsExp, + Status: status, + }) + } + + if len(report.FailedCriteria) == 0 { + report.OverallStatus = "PASS" + report.Summary = fmt.Sprintf("Single-version test (%s) completed successfully", version) + } else { + report.OverallStatus = "FAIL" + report.Summary = fmt.Sprintf("%d metrics failed: %s", + len(report.FailedCriteria), + strings.Join(report.FailedCriteria, ", ")) + } + + return report, nil +} + +func loadMetricFile(path string) (PrometheusResponse, error) { + var resp PrometheusResponse + data, err := os.ReadFile(path) + if err != nil { + return resp, err + } + err = json.Unmarshal(data, &resp) + return resp, err +} + +func sumAllValues(data PrometheusResponse) float64 { + var sum float64 + for _, result := range data.Data.Result { + if len(result.Value) >= 2 { + if v, ok := result.Value[1].(string); ok { + var f float64 + fmt.Sscanf(v, "%f", &f) + sum += f + } + } + } + return sum +} + +func sumSuccessValues(data PrometheusResponse) float64 { + var sum float64 + for _, result := range data.Data.Result { + if result.Metric["success"] == "true" { + if len(result.Value) >= 2 { + if v, ok := result.Value[1].(string); ok { + var f float64 + fmt.Sscanf(v, "%f", &f) + sum += f + } + } + } + } + return sum +} + +func getFirstValue(data PrometheusResponse) float64 { + if len(data.Data.Result) > 0 && len(data.Data.Result[0].Value) >= 2 { + if v, ok := data.Data.Result[0].Value[1].(string); ok { + var f float64 + fmt.Sscanf(v, "%f", &f) + return f + } + } + return 0 +} + +func bytesToMB(data PrometheusResponse) float64 { + bytes := getFirstValue(data) + return bytes / (1024 * 1024) +} + +func secondsToMs(data PrometheusResponse) float64 { + seconds := getFirstValue(data) + return seconds * 1000 +} + +func meetsExpected(value, expected float64) string { + if expected == 0 { + return "-" + } + tolerance := expected * 0.15 + if math.Abs(value-expected) <= tolerance { + return "✓" + } + return "✗" +} + +func compareMetricWithExpected(name string, oldValue, newValue, expected float64) MetricComparison { + diff := newValue - oldValue + absDiff := math.Abs(diff) + var diffPct float64 + if oldValue != 0 { + diffPct = (diff / oldValue) * 100 + } else if newValue != 0 { + diffPct = 100 + } + + threshold, ok := thresholds[name] + if !ok { + threshold = thresholdConfig{maxDiff: 10.0, metricType: ShouldMatch} + } + + info := metricInfoMap[name] + if info.unit == "" { + info = metricInfo{unit: "count", isCounter: true} + } + displayName := name + if info.unit != "count" { + displayName = fmt.Sprintf("%s (%s)", name, info.unit) + } + + if info.isCounter { + oldValue = math.Round(oldValue) + newValue = math.Round(newValue) + } + + status := "pass" + oldMeetsExp := meetsExpected(oldValue, expected) + newMeetsExp := meetsExpected(newValue, expected) + + isNewMetric := info.isCounter && oldValue == 0 && newValue > 0 && expected == 0 + + if isNewMetric { + status = "info" + } else if expected > 0 && threshold.metricType == ShouldMatch { + if newMeetsExp == "✗" { + status = "fail" + } + } else { + switch threshold.metricType { + case LowerIsBetter: + if threshold.minAbsDiff > 0 && absDiff < threshold.minAbsDiff { + status = "pass" + } else if diffPct > threshold.maxDiff { + status = "fail" + } + case HigherIsBetter: + if diffPct < -threshold.maxDiff { + status = "fail" + } + case ShouldMatch: + if math.Abs(diffPct) > threshold.maxDiff { + status = "fail" + } + case Informational: + status = "info" + } + } + + return MetricComparison{ + Name: name, + DisplayName: displayName, + Unit: info.unit, + IsCounter: info.isCounter, + Expected: expected, + OldMeetsExpected: oldMeetsExp, + NewMeetsExpected: newMeetsExp, + OldValue: oldValue, + NewValue: newValue, + Difference: diff, + DiffPct: diffPct, + Status: status, + Threshold: threshold.maxDiff, + } +} + +func renderScenarioReport(report *ScenarioReport) string { + var sb strings.Builder + + isSingleVersion := true + for _, c := range report.Comparisons { + if c.OldValue != 0 { + isSingleVersion = false + break + } + } + + sb.WriteString("\n") + sb.WriteString("================================================================================\n") + if isSingleVersion { + sb.WriteString(" RELOADER TEST REPORT\n") + } else { + sb.WriteString(" RELOADER A/B COMPARISON REPORT\n") + } + sb.WriteString("================================================================================\n\n") + + fmt.Fprintf(&sb, "Scenario: %s\n", report.Scenario) + fmt.Fprintf(&sb, "Generated: %s\n", report.Timestamp.Format("2006-01-02 15:04:05")) + fmt.Fprintf(&sb, "Status: %s\n", report.OverallStatus) + fmt.Fprintf(&sb, "Summary: %s\n", report.Summary) + + if report.TestDescription != "" { + fmt.Fprintf(&sb, "Test: %s\n", report.TestDescription) + } + + if report.Expected.ActionTotal > 0 { + sb.WriteString("\n--------------------------------------------------------------------------------\n") + sb.WriteString(" EXPECTED VALUES\n") + sb.WriteString("--------------------------------------------------------------------------------\n") + fmt.Fprintf(&sb, "Expected Action Total: %d\n", report.Expected.ActionTotal) + fmt.Fprintf(&sb, "Expected Reload Executed Total: %d\n", report.Expected.ReloadExecutedTotal) + if report.Expected.SkippedTotal > 0 { + fmt.Fprintf(&sb, "Expected Skipped Total: %d\n", report.Expected.SkippedTotal) + } + } + + sb.WriteString("\n--------------------------------------------------------------------------------\n") + if isSingleVersion { + sb.WriteString(" METRICS\n") + } else { + sb.WriteString(" METRIC COMPARISONS\n") + } + sb.WriteString("--------------------------------------------------------------------------------\n") + + if isSingleVersion { + sb.WriteString("(✓ = meets expected value within 15%)\n\n") + fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n", + "Metric", "Value", "Expected", "Met?", "Status") + fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n", + "------", "-----", "--------", "----", "------") + + for _, c := range report.Comparisons { + if c.IsCounter { + if c.Expected > 0 { + fmt.Fprintf(&sb, "%-32s %12.0f %10.0f %5s %8s\n", + c.DisplayName, c.NewValue, c.Expected, + c.NewMeetsExpected, c.Status) + } else { + fmt.Fprintf(&sb, "%-32s %12.0f %10s %5s %8s\n", + c.DisplayName, c.NewValue, "-", + c.NewMeetsExpected, c.Status) + } + } else { + fmt.Fprintf(&sb, "%-32s %12.4f %10s %5s %8s\n", + c.DisplayName, c.NewValue, "-", + c.NewMeetsExpected, c.Status) + } + } + } else { + sb.WriteString("(Old✓/New✓ = meets expected value within 15%)\n\n") + + fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n", + "Metric", "Old", "New", "Expected", "Old✓", "New✓", "Status") + fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n", + "------", "---", "---", "--------", "----", "----", "------") + + for _, c := range report.Comparisons { + if c.IsCounter { + if c.Expected > 0 { + fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10.0f %5s %5s %8s\n", + c.DisplayName, c.OldValue, c.NewValue, c.Expected, + c.OldMeetsExpected, c.NewMeetsExpected, c.Status) + } else { + fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10s %5s %5s %8s\n", + c.DisplayName, c.OldValue, c.NewValue, "-", + c.OldMeetsExpected, c.NewMeetsExpected, c.Status) + } + } else { + fmt.Fprintf(&sb, "%-32s %12.4f %12.4f %10s %5s %5s %8s\n", + c.DisplayName, c.OldValue, c.NewValue, "-", + c.OldMeetsExpected, c.NewMeetsExpected, c.Status) + } + } + } + + sb.WriteString("\n--------------------------------------------------------------------------------\n") + sb.WriteString(" PASS/FAIL CRITERIA\n") + sb.WriteString("--------------------------------------------------------------------------------\n\n") + + fmt.Fprintf(&sb, "Passed (%d):\n", len(report.PassCriteria)) + for _, p := range report.PassCriteria { + fmt.Fprintf(&sb, " ✓ %s\n", p) + } + + if len(report.FailedCriteria) > 0 { + fmt.Fprintf(&sb, "\nFailed (%d):\n", len(report.FailedCriteria)) + for _, f := range report.FailedCriteria { + fmt.Fprintf(&sb, " ✗ %s\n", f) + } + } + + sb.WriteString("\n--------------------------------------------------------------------------------\n") + sb.WriteString(" THRESHOLDS USED\n") + sb.WriteString("--------------------------------------------------------------------------------\n\n") + + fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n", + "Metric", "Max Diff%", "Min Abs Diff", "Direction") + fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n", + "------", "---------", "------------", "---------") + + var names []string + for name := range thresholds { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + t := thresholds[name] + var direction string + switch t.metricType { + case LowerIsBetter: + direction = "lower is better" + case HigherIsBetter: + direction = "higher is better" + case ShouldMatch: + direction = "should match" + case Informational: + direction = "info only" + } + minAbsDiff := "-" + if t.minAbsDiff > 0 { + minAbsDiff = fmt.Sprintf("%.1f", t.minAbsDiff) + } + fmt.Fprintf(&sb, "%-35s %9.1f%% %15s %18s\n", + name, t.maxDiff, minAbsDiff, direction) + } + + sb.WriteString("\n================================================================================\n") + + return sb.String() +} + +func renderScenarioReportJSON(report *ScenarioReport) string { + data, err := json.MarshalIndent(report, "", " ") + if err != nil { + return fmt.Sprintf(`{"error": "%s"}`, err.Error()) + } + return string(data) +} + +func renderScenarioReportMarkdown(report *ScenarioReport) string { + var sb strings.Builder + + emoji := "✅" + if report.OverallStatus != "PASS" { + emoji = "❌" + } + + sb.WriteString(fmt.Sprintf("## %s %s: %s\n\n", emoji, report.Scenario, report.OverallStatus)) + + if report.TestDescription != "" { + sb.WriteString(fmt.Sprintf("> %s\n\n", report.TestDescription)) + } + + sb.WriteString("| Metric | Value | Expected | Status |\n") + sb.WriteString("|--------|------:|:--------:|:------:|\n") + + keyMetrics := []string{"action_total", "reload_executed_total", "errors_total", "reconcile_total"} + for _, name := range keyMetrics { + for _, c := range report.Comparisons { + if c.Name == name { + value := fmt.Sprintf("%.0f", c.NewValue) + expected := "-" + if c.Expected > 0 { + expected = fmt.Sprintf("%.0f", c.Expected) + } + status := "✅" + if c.Status == "fail" { + status = "❌" + } else if c.Status == "info" { + status = "ℹ️" + } + sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s |\n", c.DisplayName, value, expected, status)) + break + } + } + } + + return sb.String() +} diff --git a/test/loadtest/internal/cmd/root.go b/test/loadtest/internal/cmd/root.go new file mode 100644 index 0000000..46e9be5 --- /dev/null +++ b/test/loadtest/internal/cmd/root.go @@ -0,0 +1,43 @@ +package cmd + +import ( + "os" + + "github.com/spf13/cobra" +) + +const ( + // DefaultClusterName is the default kind cluster name. + DefaultClusterName = "reloader-loadtest" + // TestNamespace is the namespace used for test resources. + TestNamespace = "reloader-test" +) + +// OutputFormat defines the output format for reports. +type OutputFormat string + +const ( + OutputFormatText OutputFormat = "text" + OutputFormatJSON OutputFormat = "json" + OutputFormatMarkdown OutputFormat = "markdown" +) + +// rootCmd is the base command. +var rootCmd = &cobra.Command{ + Use: "loadtest", + Short: "Reloader Load Test CLI", + Long: `A CLI tool for running A/B comparison load tests on Reloader.`, +} + +func init() { + rootCmd.AddCommand(runCmd) + rootCmd.AddCommand(reportCmd) + rootCmd.AddCommand(summaryCmd) +} + +// Execute runs the root command. +func Execute() { + if err := rootCmd.Execute(); err != nil { + os.Exit(1) + } +} diff --git a/test/loadtest/internal/cmd/run.go b/test/loadtest/internal/cmd/run.go new file mode 100644 index 0000000..c78e579 --- /dev/null +++ b/test/loadtest/internal/cmd/run.go @@ -0,0 +1,648 @@ +package cmd + +import ( + "context" + "fmt" + "log" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "sync" + "syscall" + "time" + + "github.com/spf13/cobra" + "github.com/stakater/Reloader/test/loadtest/internal/cluster" + "github.com/stakater/Reloader/test/loadtest/internal/prometheus" + "github.com/stakater/Reloader/test/loadtest/internal/reloader" + "github.com/stakater/Reloader/test/loadtest/internal/scenarios" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +// RunConfig holds CLI configuration for the run command. +type RunConfig struct { + OldImage string + NewImage string + Scenario string + Duration int + SkipCluster bool + SkipImageLoad bool + ClusterName string + ResultsDir string + ManifestsDir string + Parallelism int +} + +// workerContext holds all resources for a single worker (cluster + prometheus). +type workerContext struct { + id int + clusterMgr *cluster.Manager + promMgr *prometheus.Manager + kubeClient kubernetes.Interface + kubeContext string + runtime string +} + +var runCfg RunConfig + +var runCmd = &cobra.Command{ + Use: "run", + Short: "Run A/B comparison tests", + Long: `Run load tests comparing old and new versions of Reloader. + +Examples: + # Compare two images + loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=stakater/reloader:v1.1.0 + + # Run specific scenario + loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=localhost/reloader:dev --scenario=S2 + + # Test single image (no comparison) + loadtest run --new-image=localhost/reloader:test + + # Run all scenarios in parallel on 4 clusters + loadtest run --new-image=localhost/reloader:test --parallelism=4`, + Run: func(cmd *cobra.Command, args []string) { + runCommand() + }, +} + +func init() { + runCmd.Flags().StringVar(&runCfg.OldImage, "old-image", "", "Container image for \"old\" version (required for comparison)") + runCmd.Flags().StringVar(&runCfg.NewImage, "new-image", "", "Container image for \"new\" version (required for comparison)") + runCmd.Flags().StringVar(&runCfg.Scenario, "scenario", "all", "Test scenario: S1-S13 or \"all\"") + runCmd.Flags().IntVar(&runCfg.Duration, "duration", 60, "Test duration in seconds") + runCmd.Flags().IntVar(&runCfg.Parallelism, "parallelism", 1, "Run N scenarios in parallel on N clusters") + runCmd.Flags().BoolVar(&runCfg.SkipCluster, "skip-cluster", false, "Skip kind cluster creation (use existing)") + runCmd.Flags().BoolVar(&runCfg.SkipImageLoad, "skip-image-load", false, "Skip loading images into kind (use when images already loaded)") + runCmd.Flags().StringVar(&runCfg.ClusterName, "cluster-name", DefaultClusterName, "Kind cluster name") + runCmd.Flags().StringVar(&runCfg.ResultsDir, "results-dir", "./results", "Directory for results") + runCmd.Flags().StringVar(&runCfg.ManifestsDir, "manifests-dir", "", "Directory containing manifests (auto-detected if not set)") +} + +func runCommand() { + if runCfg.ManifestsDir == "" { + execPath, _ := os.Executable() + execDir := filepath.Dir(execPath) + runCfg.ManifestsDir = filepath.Join(execDir, "..", "..", "manifests") + if _, err := os.Stat(runCfg.ManifestsDir); os.IsNotExist(err) { + runCfg.ManifestsDir = "./manifests" + } + } + + if runCfg.Parallelism < 1 { + runCfg.Parallelism = 1 + } + + if runCfg.OldImage == "" && runCfg.NewImage == "" { + log.Fatal("At least one of --old-image or --new-image is required") + } + + runOld := runCfg.OldImage != "" + runNew := runCfg.NewImage != "" + runBoth := runOld && runNew + + log.Printf("Configuration:") + log.Printf(" Scenario: %s", runCfg.Scenario) + log.Printf(" Duration: %ds", runCfg.Duration) + log.Printf(" Parallelism: %d", runCfg.Parallelism) + if runCfg.OldImage != "" { + log.Printf(" Old image: %s", runCfg.OldImage) + } + if runCfg.NewImage != "" { + log.Printf(" New image: %s", runCfg.NewImage) + } + + runtime, err := cluster.DetectContainerRuntime() + if err != nil { + log.Fatalf("Failed to detect container runtime: %v", err) + } + log.Printf(" Container runtime: %s", runtime) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + log.Println("Received shutdown signal...") + cancel() + }() + + var scenariosToRun []string + if runCfg.Scenario == "all" { + scenariosToRun = []string{"S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S11", "S12", "S13"} + } else { + // Split comma-separated scenarios (e.g., "S1,S4,S6") + for _, s := range strings.Split(runCfg.Scenario, ",") { + if trimmed := strings.TrimSpace(s); trimmed != "" { + scenariosToRun = append(scenariosToRun, trimmed) + } + } + } + + if runCfg.SkipCluster && runCfg.Parallelism > 1 { + log.Fatal("--skip-cluster is not supported with --parallelism > 1") + } + + if runCfg.Parallelism > 1 { + runParallel(ctx, runCfg, scenariosToRun, runtime, runOld, runNew, runBoth) + return + } + + runSequential(ctx, runCfg, scenariosToRun, runtime, runOld, runNew, runBoth) +} + +func runSequential(ctx context.Context, cfg RunConfig, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) { + clusterMgr := cluster.NewManager(cluster.Config{ + Name: cfg.ClusterName, + ContainerRuntime: runtime, + }) + + if cfg.SkipCluster { + log.Printf("Skipping cluster creation (using existing cluster: %s)", cfg.ClusterName) + if !clusterMgr.Exists() { + log.Fatalf("Cluster %s does not exist. Remove --skip-cluster to create it.", cfg.ClusterName) + } + } else { + log.Println("Creating kind cluster...") + if err := clusterMgr.Create(ctx); err != nil { + log.Fatalf("Failed to create cluster: %v", err) + } + } + + promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml") + promMgr := prometheus.NewManager(promManifest) + + log.Println("Installing Prometheus...") + if err := promMgr.Deploy(ctx); err != nil { + log.Fatalf("Failed to deploy Prometheus: %v", err) + } + + if err := promMgr.StartPortForward(ctx); err != nil { + log.Fatalf("Failed to start Prometheus port-forward: %v", err) + } + defer promMgr.StopPortForward() + + if cfg.SkipImageLoad { + log.Println("Skipping image loading (--skip-image-load)") + } else { + log.Println("Loading images into kind cluster...") + if runOld { + log.Printf("Loading old image: %s", cfg.OldImage) + if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil { + log.Fatalf("Failed to load old image: %v", err) + } + } + if runNew { + log.Printf("Loading new image: %s", cfg.NewImage) + if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil { + log.Fatalf("Failed to load new image: %v", err) + } + } + + log.Println("Pre-loading test images...") + testImage := "gcr.io/google-containers/busybox:1.27" + clusterMgr.LoadImage(ctx, testImage) + } + + kubeClient, err := getKubeClient("") + if err != nil { + log.Fatalf("Failed to create kubernetes client: %v", err) + } + + for _, scenarioID := range scenariosToRun { + log.Printf("========================================") + log.Printf("=== Starting scenario %s ===", scenarioID) + log.Printf("========================================") + + cleanupTestNamespaces(ctx, "") + reloader.CleanupByVersion(ctx, "old", "") + reloader.CleanupByVersion(ctx, "new", "") + + if err := promMgr.Reset(ctx); err != nil { + log.Printf("Warning: failed to reset Prometheus: %v", err) + } + + createTestNamespace(ctx, "") + + if runOld { + oldMgr := reloader.NewManager(reloader.Config{ + Version: "old", + Image: cfg.OldImage, + }) + + if err := oldMgr.Deploy(ctx); err != nil { + log.Printf("Failed to deploy old Reloader: %v", err) + continue + } + + if err := promMgr.WaitForTarget(ctx, oldMgr.Job(), 60*time.Second); err != nil { + log.Printf("Warning: %v", err) + log.Println("Proceeding anyway, but metrics may be incomplete") + } + + runScenario(ctx, kubeClient, scenarioID, "old", cfg.OldImage, cfg.Duration, cfg.ResultsDir) + collectMetrics(ctx, promMgr, oldMgr.Job(), scenarioID, "old", cfg.ResultsDir) + collectLogs(ctx, oldMgr, scenarioID, "old", cfg.ResultsDir) + + if runBoth { + cleanupTestNamespaces(ctx, "") + oldMgr.Cleanup(ctx) + promMgr.Reset(ctx) + createTestNamespace(ctx, "") + } + } + + if runNew { + newMgr := reloader.NewManager(reloader.Config{ + Version: "new", + Image: cfg.NewImage, + }) + + if err := newMgr.Deploy(ctx); err != nil { + log.Printf("Failed to deploy new Reloader: %v", err) + continue + } + + if err := promMgr.WaitForTarget(ctx, newMgr.Job(), 60*time.Second); err != nil { + log.Printf("Warning: %v", err) + log.Println("Proceeding anyway, but metrics may be incomplete") + } + + runScenario(ctx, kubeClient, scenarioID, "new", cfg.NewImage, cfg.Duration, cfg.ResultsDir) + collectMetrics(ctx, promMgr, newMgr.Job(), scenarioID, "new", cfg.ResultsDir) + collectLogs(ctx, newMgr, scenarioID, "new", cfg.ResultsDir) + } + + generateReport(scenarioID, cfg.ResultsDir, runBoth) + log.Printf("=== Scenario %s complete ===", scenarioID) + } + + log.Println("Load test complete!") + log.Printf("Results available in: %s", cfg.ResultsDir) +} + +func runParallel(ctx context.Context, cfg RunConfig, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) { + numWorkers := cfg.Parallelism + if numWorkers > len(scenariosToRun) { + numWorkers = len(scenariosToRun) + log.Printf("Reducing parallelism to %d (number of scenarios)", numWorkers) + } + + log.Printf("Starting parallel execution with %d workers", numWorkers) + + workers := make([]*workerContext, numWorkers) + var setupWg sync.WaitGroup + setupErrors := make(chan error, numWorkers) + + log.Println("Setting up worker clusters...") + for i := range numWorkers { + setupWg.Add(1) + go func(workerID int) { + defer setupWg.Done() + worker, err := setupWorker(ctx, cfg, workerID, runtime, runOld, runNew) + if err != nil { + setupErrors <- fmt.Errorf("worker %d setup failed: %w", workerID, err) + return + } + workers[workerID] = worker + }(i) + } + + setupWg.Wait() + close(setupErrors) + + for err := range setupErrors { + log.Printf("Error: %v", err) + } + + readyWorkers := 0 + for _, w := range workers { + if w != nil { + readyWorkers++ + } + } + if readyWorkers == 0 { + log.Fatal("No workers ready, aborting") + } + if readyWorkers < numWorkers { + log.Printf("Warning: only %d/%d workers ready", readyWorkers, numWorkers) + } + + defer func() { + log.Println("Cleaning up worker clusters...") + for _, w := range workers { + if w != nil { + w.promMgr.StopPortForward() + } + } + }() + + scenarioCh := make(chan string, len(scenariosToRun)) + for _, s := range scenariosToRun { + scenarioCh <- s + } + close(scenarioCh) + + var resultsMu sync.Mutex + completedScenarios := make([]string, 0, len(scenariosToRun)) + + var wg sync.WaitGroup + for _, worker := range workers { + if worker == nil { + continue + } + wg.Add(1) + go func(w *workerContext) { + defer wg.Done() + for scenarioID := range scenarioCh { + select { + case <-ctx.Done(): + return + default: + } + + log.Printf("[Worker %d] Starting scenario %s", w.id, scenarioID) + + cleanupTestNamespaces(ctx, w.kubeContext) + reloader.CleanupByVersion(ctx, "old", w.kubeContext) + reloader.CleanupByVersion(ctx, "new", w.kubeContext) + + if err := w.promMgr.Reset(ctx); err != nil { + log.Printf("[Worker %d] Warning: failed to reset Prometheus: %v", w.id, err) + } + + createTestNamespace(ctx, w.kubeContext) + + if runOld { + runVersionOnWorker(ctx, w, cfg, scenarioID, "old", cfg.OldImage, runBoth) + } + + if runNew { + runVersionOnWorker(ctx, w, cfg, scenarioID, "new", cfg.NewImage, false) + } + + generateReport(scenarioID, cfg.ResultsDir, runBoth) + + resultsMu.Lock() + completedScenarios = append(completedScenarios, scenarioID) + resultsMu.Unlock() + + log.Printf("[Worker %d] Scenario %s complete", w.id, scenarioID) + } + }(worker) + } + + wg.Wait() + + log.Println("Load test complete!") + log.Printf("Completed %d/%d scenarios", len(completedScenarios), len(scenariosToRun)) + log.Printf("Results available in: %s", cfg.ResultsDir) +} + +func setupWorker(ctx context.Context, cfg RunConfig, workerID int, runtime string, runOld, runNew bool) (*workerContext, error) { + workerName := fmt.Sprintf("%s-%d", DefaultClusterName, workerID) + promPort := 9091 + workerID + + log.Printf("[Worker %d] Creating cluster %s (ports %d/%d)...", workerID, workerName, 8080+workerID, 8443+workerID) + + clusterMgr := cluster.NewManager(cluster.Config{ + Name: workerName, + ContainerRuntime: runtime, + PortOffset: workerID, + }) + + if err := clusterMgr.Create(ctx); err != nil { + return nil, fmt.Errorf("creating cluster: %w", err) + } + + kubeContext := clusterMgr.Context() + + promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml") + promMgr := prometheus.NewManagerWithPort(promManifest, promPort, kubeContext) + + log.Printf("[Worker %d] Installing Prometheus (port %d)...", workerID, promPort) + if err := promMgr.Deploy(ctx); err != nil { + return nil, fmt.Errorf("deploying prometheus: %w", err) + } + + if err := promMgr.StartPortForward(ctx); err != nil { + return nil, fmt.Errorf("starting prometheus port-forward: %w", err) + } + + if cfg.SkipImageLoad { + log.Printf("[Worker %d] Skipping image loading (--skip-image-load)", workerID) + } else { + log.Printf("[Worker %d] Loading images...", workerID) + if runOld { + if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil { + log.Printf("[Worker %d] Warning: failed to load old image: %v", workerID, err) + } + } + if runNew { + if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil { + log.Printf("[Worker %d] Warning: failed to load new image: %v", workerID, err) + } + } + + testImage := "gcr.io/google-containers/busybox:1.27" + clusterMgr.LoadImage(ctx, testImage) + } + + kubeClient, err := getKubeClient(kubeContext) + if err != nil { + return nil, fmt.Errorf("creating kubernetes client: %w", err) + } + + log.Printf("[Worker %d] Ready", workerID) + return &workerContext{ + id: workerID, + clusterMgr: clusterMgr, + promMgr: promMgr, + kubeClient: kubeClient, + kubeContext: kubeContext, + runtime: runtime, + }, nil +} + +func runVersionOnWorker(ctx context.Context, w *workerContext, cfg RunConfig, scenarioID, version, image string, cleanupAfter bool) { + mgr := reloader.NewManager(reloader.Config{ + Version: version, + Image: image, + }) + mgr.SetKubeContext(w.kubeContext) + + if err := mgr.Deploy(ctx); err != nil { + log.Printf("[Worker %d] Failed to deploy %s Reloader: %v", w.id, version, err) + return + } + + if err := w.promMgr.WaitForTarget(ctx, mgr.Job(), 60*time.Second); err != nil { + log.Printf("[Worker %d] Warning: %v", w.id, err) + log.Printf("[Worker %d] Proceeding anyway, but metrics may be incomplete", w.id) + } + + runScenario(ctx, w.kubeClient, scenarioID, version, image, cfg.Duration, cfg.ResultsDir) + collectMetrics(ctx, w.promMgr, mgr.Job(), scenarioID, version, cfg.ResultsDir) + collectLogs(ctx, mgr, scenarioID, version, cfg.ResultsDir) + + if cleanupAfter { + cleanupTestNamespaces(ctx, w.kubeContext) + mgr.Cleanup(ctx) + w.promMgr.Reset(ctx) + createTestNamespace(ctx, w.kubeContext) + } +} + +func runScenario(ctx context.Context, client kubernetes.Interface, scenarioID, version, image string, duration int, resultsDir string) { + runner, ok := scenarios.Registry[scenarioID] + if !ok { + log.Printf("Unknown scenario: %s", scenarioID) + return + } + + if s6, ok := runner.(*scenarios.ControllerRestartScenario); ok { + s6.ReloaderVersion = version + } + + if s11, ok := runner.(*scenarios.AnnotationStrategyScenario); ok { + s11.Image = image + } + + log.Printf("Running scenario %s (%s): %s", scenarioID, version, runner.Description()) + + if ctx.Err() != nil { + log.Printf("WARNING: Parent context already done: %v", ctx.Err()) + } + + timeout := time.Duration(duration)*time.Second + 5*time.Minute + log.Printf("Creating scenario context with timeout: %v (duration=%ds)", timeout, duration) + + scenarioCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + expected, err := runner.Run(scenarioCtx, client, TestNamespace, time.Duration(duration)*time.Second) + if err != nil { + log.Printf("Scenario %s failed: %v", scenarioID, err) + } + + scenarios.WriteExpectedMetrics(scenarioID, resultsDir, expected) +} + +func collectMetrics(ctx context.Context, promMgr *prometheus.Manager, job, scenarioID, version, resultsDir string) { + log.Printf("Waiting 5s for Reloader to finish processing events...") + time.Sleep(5 * time.Second) + + log.Printf("Waiting 8s for Prometheus to scrape final metrics...") + time.Sleep(8 * time.Second) + + log.Printf("Collecting metrics for %s...", version) + outputDir := filepath.Join(resultsDir, scenarioID, version) + if err := promMgr.CollectMetrics(ctx, job, outputDir, scenarioID); err != nil { + log.Printf("Failed to collect metrics: %v", err) + } +} + +func collectLogs(ctx context.Context, mgr *reloader.Manager, scenarioID, version, resultsDir string) { + log.Printf("Collecting logs for %s...", version) + logPath := filepath.Join(resultsDir, scenarioID, version, "reloader.log") + if err := mgr.CollectLogs(ctx, logPath); err != nil { + log.Printf("Failed to collect logs: %v", err) + } +} + +func generateReport(scenarioID, resultsDir string, isComparison bool) { + if isComparison { + log.Println("Generating comparison report...") + } else { + log.Println("Generating single-version report...") + } + + reportPath := filepath.Join(resultsDir, scenarioID, "report.txt") + + cmd := exec.Command(os.Args[0], "report", + fmt.Sprintf("--scenario=%s", scenarioID), + fmt.Sprintf("--results-dir=%s", resultsDir), + fmt.Sprintf("--output=%s", reportPath)) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Run() + + if data, err := os.ReadFile(reportPath); err == nil { + fmt.Println(string(data)) + } + + log.Printf("Report saved to: %s", reportPath) +} + +func getKubeClient(kubeContext string) (kubernetes.Interface, error) { + kubeconfig := os.Getenv("KUBECONFIG") + if kubeconfig == "" { + home, _ := os.UserHomeDir() + kubeconfig = filepath.Join(home, ".kube", "config") + } + + loadingRules := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig} + configOverrides := &clientcmd.ConfigOverrides{} + if kubeContext != "" { + configOverrides.CurrentContext = kubeContext + } + + kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) + config, err := kubeConfig.ClientConfig() + if err != nil { + return nil, err + } + + return kubernetes.NewForConfig(config) +} + +func createTestNamespace(ctx context.Context, kubeContext string) { + args := []string{"create", "namespace", TestNamespace, "--dry-run=client", "-o", "yaml"} + if kubeContext != "" { + args = append([]string{"--context", kubeContext}, args...) + } + cmd := exec.CommandContext(ctx, "kubectl", args...) + out, _ := cmd.Output() + + applyArgs := []string{"apply", "-f", "-"} + if kubeContext != "" { + applyArgs = append([]string{"--context", kubeContext}, applyArgs...) + } + applyCmd := exec.CommandContext(ctx, "kubectl", applyArgs...) + applyCmd.Stdin = strings.NewReader(string(out)) + applyCmd.Run() +} + +func cleanupTestNamespaces(ctx context.Context, kubeContext string) { + log.Println("Cleaning up test resources...") + + namespaces := []string{TestNamespace} + for i := range 10 { + namespaces = append(namespaces, fmt.Sprintf("%s-%d", TestNamespace, i)) + } + + for _, ns := range namespaces { + args := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"} + if kubeContext != "" { + args = append([]string{"--context", kubeContext}, args...) + } + exec.CommandContext(ctx, "kubectl", args...).Run() + } + + time.Sleep(2 * time.Second) + + for _, ns := range namespaces { + args := []string{"delete", "pods", "--all", "-n", ns, "--grace-period=0", "--force"} + if kubeContext != "" { + args = append([]string{"--context", kubeContext}, args...) + } + exec.CommandContext(ctx, "kubectl", args...).Run() + } +} + diff --git a/test/loadtest/internal/cmd/summary.go b/test/loadtest/internal/cmd/summary.go new file mode 100644 index 0000000..bda40fb --- /dev/null +++ b/test/loadtest/internal/cmd/summary.go @@ -0,0 +1,251 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "log" + "os" + "sort" + "strings" + "time" + + "github.com/spf13/cobra" +) + +var ( + summaryResultsDir string + summaryOutputFile string + summaryFormat string + summaryTestType string +) + +var summaryCmd = &cobra.Command{ + Use: "summary", + Short: "Generate summary across all scenarios (for CI)", + Long: `Generate an aggregated summary report across all test scenarios. + +Examples: + # Generate markdown summary for CI + loadtest summary --results-dir=./results --format=markdown`, + Run: func(cmd *cobra.Command, args []string) { + summaryCommand() + }, +} + +func init() { + summaryCmd.Flags().StringVar(&summaryResultsDir, "results-dir", "./results", "Directory containing results") + summaryCmd.Flags().StringVar(&summaryOutputFile, "output", "", "Output file (default: stdout)") + summaryCmd.Flags().StringVar(&summaryFormat, "format", "markdown", "Output format: text, json, markdown") + summaryCmd.Flags().StringVar(&summaryTestType, "test-type", "full", "Test type label: quick, full") +} + +// SummaryReport aggregates results from multiple scenarios. +type SummaryReport struct { + Timestamp time.Time `json:"timestamp"` + TestType string `json:"test_type"` + PassCount int `json:"pass_count"` + FailCount int `json:"fail_count"` + TotalCount int `json:"total_count"` + Scenarios []ScenarioSummary `json:"scenarios"` +} + +// ScenarioSummary provides a brief summary of a single scenario. +type ScenarioSummary struct { + ID string `json:"id"` + Status string `json:"status"` + Description string `json:"description"` + ActionTotal float64 `json:"action_total"` + ActionExp float64 `json:"action_expected"` + ErrorsTotal float64 `json:"errors_total"` +} + +func summaryCommand() { + summary, err := generateSummaryReport(summaryResultsDir, summaryTestType) + if err != nil { + log.Fatalf("Failed to generate summary: %v", err) + } + + var output string + switch OutputFormat(summaryFormat) { + case OutputFormatJSON: + output = renderSummaryJSON(summary) + case OutputFormatText: + output = renderSummaryText(summary) + default: + output = renderSummaryMarkdown(summary) + } + + if summaryOutputFile != "" { + if err := os.WriteFile(summaryOutputFile, []byte(output), 0644); err != nil { + log.Fatalf("Failed to write output file: %v", err) + } + log.Printf("Summary written to %s", summaryOutputFile) + } else { + fmt.Print(output) + } + + if summary.FailCount > 0 { + os.Exit(1) + } +} + +func generateSummaryReport(resultsDir, testType string) (*SummaryReport, error) { + summary := &SummaryReport{ + Timestamp: time.Now(), + TestType: testType, + } + + entries, err := os.ReadDir(resultsDir) + if err != nil { + return nil, fmt.Errorf("failed to read results directory: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() || !strings.HasPrefix(entry.Name(), "S") { + continue + } + + scenarioID := entry.Name() + report, err := generateScenarioReport(scenarioID, resultsDir) + if err != nil { + log.Printf("Warning: failed to load scenario %s: %v", scenarioID, err) + continue + } + + scenarioSummary := ScenarioSummary{ + ID: scenarioID, + Status: report.OverallStatus, + Description: report.TestDescription, + } + + for _, c := range report.Comparisons { + switch c.Name { + case "action_total": + scenarioSummary.ActionTotal = c.NewValue + scenarioSummary.ActionExp = c.Expected + case "errors_total": + scenarioSummary.ErrorsTotal = c.NewValue + } + } + + summary.Scenarios = append(summary.Scenarios, scenarioSummary) + summary.TotalCount++ + if report.OverallStatus == "PASS" { + summary.PassCount++ + } else { + summary.FailCount++ + } + } + + sort.Slice(summary.Scenarios, func(i, j int) bool { + return naturalSort(summary.Scenarios[i].ID, summary.Scenarios[j].ID) + }) + + return summary, nil +} + +func naturalSort(a, b string) bool { + var aNum, bNum int + fmt.Sscanf(a, "S%d", &aNum) + fmt.Sscanf(b, "S%d", &bNum) + return aNum < bNum +} + +func renderSummaryJSON(summary *SummaryReport) string { + data, err := json.MarshalIndent(summary, "", " ") + if err != nil { + return fmt.Sprintf(`{"error": "%s"}`, err.Error()) + } + return string(data) +} + +func renderSummaryText(summary *SummaryReport) string { + var sb strings.Builder + + sb.WriteString("================================================================================\n") + sb.WriteString(" LOAD TEST SUMMARY\n") + sb.WriteString("================================================================================\n\n") + + passRate := 0 + if summary.TotalCount > 0 { + passRate = summary.PassCount * 100 / summary.TotalCount + } + + fmt.Fprintf(&sb, "Test Type: %s\n", summary.TestType) + fmt.Fprintf(&sb, "Results: %d/%d passed (%d%%)\n\n", summary.PassCount, summary.TotalCount, passRate) + + fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8s\n", "ID", "Status", "Description", "Actions", "Errors") + fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8s\n", "------", "--------", strings.Repeat("-", 45), "----------", "--------") + + for _, s := range summary.Scenarios { + desc := s.Description + if len(desc) > 45 { + desc = desc[:42] + "..." + } + actions := fmt.Sprintf("%.0f", s.ActionTotal) + if s.ActionExp > 0 { + actions = fmt.Sprintf("%.0f/%.0f", s.ActionTotal, s.ActionExp) + } + fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8.0f\n", s.ID, s.Status, desc, actions, s.ErrorsTotal) + } + + sb.WriteString("\n================================================================================\n") + return sb.String() +} + +func renderSummaryMarkdown(summary *SummaryReport) string { + var sb strings.Builder + + emoji := "✅" + title := "ALL TESTS PASSED" + if summary.FailCount > 0 { + emoji = "❌" + title = fmt.Sprintf("%d TEST(S) FAILED", summary.FailCount) + } else if summary.TotalCount == 0 { + emoji = "⚠️" + title = "NO RESULTS" + } + + sb.WriteString(fmt.Sprintf("## %s Load Test Results: %s\n\n", emoji, title)) + + if summary.TestType == "quick" { + sb.WriteString("> 🚀 **Quick Test** (S1, S4, S6) — Use `/loadtest` for full suite\n\n") + } + + passRate := 0 + if summary.TotalCount > 0 { + passRate = summary.PassCount * 100 / summary.TotalCount + } + sb.WriteString(fmt.Sprintf("**%d/%d passed** (%d%%)\n\n", summary.PassCount, summary.TotalCount, passRate)) + + sb.WriteString("| | Scenario | Description | Actions | Errors |\n") + sb.WriteString("|:-:|:--------:|-------------|:-------:|:------:|\n") + + for _, s := range summary.Scenarios { + icon := "✅" + if s.Status != "PASS" { + icon = "❌" + } + + desc := s.Description + if len(desc) > 45 { + desc = desc[:42] + "..." + } + + actions := fmt.Sprintf("%.0f", s.ActionTotal) + if s.ActionExp > 0 { + actions = fmt.Sprintf("%.0f/%.0f", s.ActionTotal, s.ActionExp) + } + + errors := fmt.Sprintf("%.0f", s.ErrorsTotal) + if s.ErrorsTotal > 0 { + errors = fmt.Sprintf("⚠️ %.0f", s.ErrorsTotal) + } + + sb.WriteString(fmt.Sprintf("| %s | **%s** | %s | %s | %s |\n", icon, s.ID, desc, actions, errors)) + } + + sb.WriteString("\n📦 **[Download detailed results](../artifacts)**\n") + + return sb.String() +} diff --git a/test/loadtest/internal/prometheus/prometheus.go b/test/loadtest/internal/prometheus/prometheus.go new file mode 100644 index 0000000..b9bf755 --- /dev/null +++ b/test/loadtest/internal/prometheus/prometheus.go @@ -0,0 +1,429 @@ +// Package prometheus provides Prometheus deployment and querying functionality. +package prometheus + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// Manager handles Prometheus operations. +type Manager struct { + manifestPath string + portForward *exec.Cmd + localPort int + kubeContext string +} + +// NewManager creates a new Prometheus manager. +func NewManager(manifestPath string) *Manager { + return &Manager{ + manifestPath: manifestPath, + localPort: 9091, + } +} + +// NewManagerWithPort creates a Prometheus manager with a custom port. +func NewManagerWithPort(manifestPath string, port int, kubeContext string) *Manager { + return &Manager{ + manifestPath: manifestPath, + localPort: port, + kubeContext: kubeContext, + } +} + +// kubectl returns kubectl args with optional context +func (m *Manager) kubectl(args ...string) []string { + if m.kubeContext != "" { + return append([]string{"--context", m.kubeContext}, args...) + } + return args +} + +// Deploy deploys Prometheus to the cluster. +func (m *Manager) Deploy(ctx context.Context) error { + cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("create", "namespace", "monitoring", "--dry-run=client", "-o", "yaml")...) + out, err := cmd.Output() + if err != nil { + return fmt.Errorf("generating namespace yaml: %w", err) + } + + applyCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", "-")...) + applyCmd.Stdin = strings.NewReader(string(out)) + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("applying namespace: %w", err) + } + + applyCmd = exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", m.manifestPath)...) + applyCmd.Stdout = os.Stdout + applyCmd.Stderr = os.Stderr + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("applying prometheus manifest: %w", err) + } + + fmt.Println("Waiting for Prometheus to be ready...") + waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod", + "-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...) + waitCmd.Stdout = os.Stdout + waitCmd.Stderr = os.Stderr + if err := waitCmd.Run(); err != nil { + return fmt.Errorf("waiting for prometheus: %w", err) + } + + return nil +} + +// StartPortForward starts port-forwarding to Prometheus. +func (m *Manager) StartPortForward(ctx context.Context) error { + m.StopPortForward() + + m.portForward = exec.CommandContext(ctx, "kubectl", m.kubectl("port-forward", + "-n", "monitoring", "svc/prometheus", fmt.Sprintf("%d:9090", m.localPort))...) + + if err := m.portForward.Start(); err != nil { + return fmt.Errorf("starting port-forward: %w", err) + } + + for i := 0; i < 30; i++ { + time.Sleep(time.Second) + if m.isAccessible() { + fmt.Printf("Prometheus accessible at http://localhost:%d\n", m.localPort) + return nil + } + } + + return fmt.Errorf("prometheus port-forward not ready after 30s") +} + +// StopPortForward stops the port-forward process. +func (m *Manager) StopPortForward() { + if m.portForward != nil && m.portForward.Process != nil { + m.portForward.Process.Kill() + m.portForward = nil + } + exec.Command("pkill", "-f", fmt.Sprintf("kubectl port-forward.*prometheus.*%d", m.localPort)).Run() +} + +// Reset restarts Prometheus to clear all metrics. +func (m *Manager) Reset(ctx context.Context) error { + m.StopPortForward() + + cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("delete", "pod", "-n", "monitoring", + "-l", "app=prometheus", "--grace-period=0", "--force")...) + cmd.Run() + + fmt.Println("Waiting for Prometheus to restart...") + waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod", + "-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...) + if err := waitCmd.Run(); err != nil { + return fmt.Errorf("waiting for prometheus restart: %w", err) + } + + if err := m.StartPortForward(ctx); err != nil { + return err + } + + fmt.Println("Waiting 5s for Prometheus to initialize scraping...") + time.Sleep(5 * time.Second) + + return nil +} + +func (m *Manager) isAccessible() bool { + conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", m.localPort), 2*time.Second) + if err != nil { + return false + } + conn.Close() + + resp, err := http.Get(fmt.Sprintf("http://localhost:%d/api/v1/status/config", m.localPort)) + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode == 200 +} + +// URL returns the local Prometheus URL. +func (m *Manager) URL() string { + return fmt.Sprintf("http://localhost:%d", m.localPort) +} + +// WaitForTarget waits for a specific job to be scraped by Prometheus. +func (m *Manager) WaitForTarget(ctx context.Context, job string, timeout time.Duration) error { + fmt.Printf("Waiting for Prometheus to discover and scrape job '%s'...\n", job) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if m.isTargetHealthy(job) { + fmt.Printf("Prometheus is scraping job '%s'\n", job) + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(2 * time.Second): + } + } + + m.printTargetStatus(job) + return fmt.Errorf("timeout waiting for Prometheus to scrape job '%s'", job) +} + +// isTargetHealthy checks if a job has at least one healthy target. +func (m *Manager) isTargetHealthy(job string) bool { + resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL())) + if err != nil { + return false + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return false + } + + var result struct { + Status string `json:"status"` + Data struct { + ActiveTargets []struct { + Labels map[string]string `json:"labels"` + Health string `json:"health"` + } `json:"activeTargets"` + } `json:"data"` + } + + if err := json.Unmarshal(body, &result); err != nil { + return false + } + + for _, target := range result.Data.ActiveTargets { + if target.Labels["job"] == job && target.Health == "up" { + return true + } + } + return false +} + +// printTargetStatus prints debug info about targets. +func (m *Manager) printTargetStatus(job string) { + resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL())) + if err != nil { + fmt.Printf("Failed to get targets: %v\n", err) + return + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + + var result struct { + Data struct { + ActiveTargets []struct { + Labels map[string]string `json:"labels"` + Health string `json:"health"` + LastError string `json:"lastError"` + ScrapeURL string `json:"scrapeUrl"` + } `json:"activeTargets"` + } `json:"data"` + } + + if err := json.Unmarshal(body, &result); err != nil { + fmt.Printf("Failed to parse targets: %v\n", err) + return + } + + fmt.Printf("Prometheus targets for job '%s':\n", job) + found := false + for _, target := range result.Data.ActiveTargets { + if target.Labels["job"] == job { + found = true + fmt.Printf(" - %s: health=%s, lastError=%s\n", + target.ScrapeURL, target.Health, target.LastError) + } + } + if !found { + fmt.Printf(" No targets found for job '%s'\n", job) + fmt.Printf(" Available jobs: ") + jobs := make(map[string]bool) + for _, target := range result.Data.ActiveTargets { + jobs[target.Labels["job"]] = true + } + for j := range jobs { + fmt.Printf("%s ", j) + } + fmt.Println() + } +} + +// HasMetrics checks if the specified job has any metrics available. +func (m *Manager) HasMetrics(ctx context.Context, job string) bool { + query := fmt.Sprintf(`up{job="%s"}`, job) + result, err := m.Query(ctx, query) + if err != nil { + return false + } + return len(result.Data.Result) > 0 && result.Data.Result[0].Value[1] == "1" +} + +// QueryResponse represents a Prometheus query response. +type QueryResponse struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric map[string]string `json:"metric"` + Value []interface{} `json:"value"` + } `json:"result"` + } `json:"data"` +} + +// Query executes a PromQL query and returns the response. +func (m *Manager) Query(ctx context.Context, query string) (*QueryResponse, error) { + u := fmt.Sprintf("%s/api/v1/query?query=%s", m.URL(), url.QueryEscape(query)) + + req, err := http.NewRequestWithContext(ctx, "GET", u, nil) + if err != nil { + return nil, err + } + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("querying prometheus: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading response: %w", err) + } + + var result QueryResponse + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("parsing response: %w", err) + } + + return &result, nil +} + +// CollectMetrics collects all metrics for a scenario and writes to output directory. +func (m *Manager) CollectMetrics(ctx context.Context, job, outputDir, scenario string) error { + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("creating output directory: %w", err) + } + + timeRange := "10m" + + // For S6 (restart scenario), use increase() to handle counter resets + useIncrease := scenario == "S6" + + counterMetrics := []string{ + "reloader_reconcile_total", + "reloader_action_total", + "reloader_skipped_total", + "reloader_errors_total", + "reloader_events_received_total", + "reloader_workloads_scanned_total", + "reloader_workloads_matched_total", + "reloader_reload_executed_total", + } + + for _, metric := range counterMetrics { + var query string + if useIncrease { + query = fmt.Sprintf(`sum(increase(%s{job="%s"}[%s])) by (success, reason)`, metric, job, timeRange) + } else { + query = fmt.Sprintf(`sum(%s{job="%s"}) by (success, reason)`, metric, job) + } + + if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, metric+".json")); err != nil { + fmt.Printf("Warning: failed to collect %s: %v\n", metric, err) + } + } + + histogramMetrics := []struct { + name string + prefix string + }{ + {"reloader_reconcile_duration_seconds", "reconcile"}, + {"reloader_action_latency_seconds", "action"}, + } + + for _, hm := range histogramMetrics { + for _, pct := range []int{50, 95, 99} { + quantile := float64(pct) / 100 + query := fmt.Sprintf(`histogram_quantile(%v, sum(rate(%s_bucket{job="%s"}[%s])) by (le))`, + quantile, hm.name, job, timeRange) + outFile := filepath.Join(outputDir, fmt.Sprintf("%s_p%d.json", hm.prefix, pct)) + if err := m.queryAndSave(ctx, query, outFile); err != nil { + fmt.Printf("Warning: failed to collect %s p%d: %v\n", hm.name, pct, err) + } + } + } + + restQueries := map[string]string{ + "rest_client_requests_total.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s"})`, job), + "rest_client_requests_get.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="GET"})`, job), + "rest_client_requests_patch.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PATCH"})`, job), + "rest_client_requests_put.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PUT"})`, job), + "rest_client_requests_errors.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",code=~"[45].."}) or vector(0)`, job), + } + + for filename, query := range restQueries { + if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil { + fmt.Printf("Warning: failed to collect %s: %v\n", filename, err) + } + } + + resourceQueries := map[string]string{ + "memory_rss_bytes_avg.json": fmt.Sprintf(`avg_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange), + "memory_rss_bytes_max.json": fmt.Sprintf(`max_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange), + "memory_rss_bytes_cur.json": fmt.Sprintf(`process_resident_memory_bytes{job="%s"}`, job), + + "memory_heap_bytes_avg.json": fmt.Sprintf(`avg_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange), + "memory_heap_bytes_max.json": fmt.Sprintf(`max_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange), + + "cpu_usage_cores_avg.json": fmt.Sprintf(`rate(process_cpu_seconds_total{job="%s"}[%s])`, job, timeRange), + "cpu_usage_cores_max.json": fmt.Sprintf(`max_over_time(rate(process_cpu_seconds_total{job="%s"}[1m])[%s:1m])`, job, timeRange), + + "goroutines_avg.json": fmt.Sprintf(`avg_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange), + "goroutines_max.json": fmt.Sprintf(`max_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange), + "goroutines_cur.json": fmt.Sprintf(`go_goroutines{job="%s"}`, job), + + "gc_duration_seconds_p99.json": fmt.Sprintf(`histogram_quantile(0.99, sum(rate(go_gc_duration_seconds_bucket{job="%s"}[%s])) by (le))`, job, timeRange), + + "threads_cur.json": fmt.Sprintf(`go_threads{job="%s"}`, job), + } + + for filename, query := range resourceQueries { + if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil { + fmt.Printf("Warning: failed to collect %s: %v\n", filename, err) + } + } + + return nil +} + +func (m *Manager) queryAndSave(ctx context.Context, query, outputPath string) error { + result, err := m.Query(ctx, query) + if err != nil { + emptyResult := `{"status":"success","data":{"resultType":"vector","result":[]}}` + return os.WriteFile(outputPath, []byte(emptyResult), 0644) + } + + data, err := json.MarshalIndent(result, "", " ") + if err != nil { + return err + } + + return os.WriteFile(outputPath, data, 0644) +} diff --git a/test/loadtest/internal/reloader/reloader.go b/test/loadtest/internal/reloader/reloader.go new file mode 100644 index 0000000..2667cd4 --- /dev/null +++ b/test/loadtest/internal/reloader/reloader.go @@ -0,0 +1,271 @@ +package reloader + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +// Config holds configuration for a Reloader deployment. +type Config struct { + Version string + Image string + Namespace string + ReloadStrategy string +} + +// Manager handles Reloader deployment operations. +type Manager struct { + config Config + kubeContext string +} + +// NewManager creates a new Reloader manager. +func NewManager(config Config) *Manager { + return &Manager{ + config: config, + } +} + +// SetKubeContext sets the kubeconfig context to use. +func (m *Manager) SetKubeContext(kubeContext string) { + m.kubeContext = kubeContext +} + +// kubectl returns kubectl command with optional context. +func (m *Manager) kubectl(ctx context.Context, args ...string) *exec.Cmd { + if m.kubeContext != "" { + args = append([]string{"--context", m.kubeContext}, args...) + } + return exec.CommandContext(ctx, "kubectl", args...) +} + +// namespace returns the namespace for this reloader instance. +func (m *Manager) namespace() string { + if m.config.Namespace != "" { + return m.config.Namespace + } + return fmt.Sprintf("reloader-%s", m.config.Version) +} + +// releaseName returns the release name for this instance. +func (m *Manager) releaseName() string { + return fmt.Sprintf("reloader-%s", m.config.Version) +} + +// Job returns the Prometheus job name for this Reloader instance. +func (m *Manager) Job() string { + return fmt.Sprintf("reloader-%s", m.config.Version) +} + +// Deploy deploys Reloader to the cluster using raw manifests. +func (m *Manager) Deploy(ctx context.Context) error { + ns := m.namespace() + name := m.releaseName() + + fmt.Printf("Deploying Reloader (%s) with image %s...\n", m.config.Version, m.config.Image) + + manifest := m.buildManifest(ns, name) + + applyCmd := m.kubectl(ctx, "apply", "-f", "-") + applyCmd.Stdin = strings.NewReader(manifest) + applyCmd.Stdout = os.Stdout + applyCmd.Stderr = os.Stderr + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("applying manifest: %w", err) + } + + fmt.Printf("Waiting for Reloader deployment to be ready...\n") + waitCmd := m.kubectl(ctx, "rollout", "status", "deployment", name, + "-n", ns, + "--timeout=120s") + waitCmd.Stdout = os.Stdout + waitCmd.Stderr = os.Stderr + if err := waitCmd.Run(); err != nil { + return fmt.Errorf("waiting for deployment: %w", err) + } + + time.Sleep(2 * time.Second) + + fmt.Printf("Reloader (%s) deployed successfully\n", m.config.Version) + return nil +} + +// buildManifest creates the raw Kubernetes manifest for Reloader. +func (m *Manager) buildManifest(ns, name string) string { + var args []string + args = append(args, "--log-format=json") + if m.config.ReloadStrategy != "" && m.config.ReloadStrategy != "default" { + args = append(args, fmt.Sprintf("--reload-strategy=%s", m.config.ReloadStrategy)) + } + + argsYAML := "" + if len(args) > 0 { + argsYAML = " args:\n" + for _, arg := range args { + argsYAML += fmt.Sprintf(" - %q\n", arg) + } + } + + return fmt.Sprintf(`--- +apiVersion: v1 +kind: Namespace +metadata: + name: %[1]s +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: %[2]s + namespace: %[1]s +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: %[2]s +rules: +- apiGroups: ["*"] + resources: ["*"] + verbs: ["*"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: %[2]s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: %[2]s +subjects: +- kind: ServiceAccount + name: %[2]s + namespace: %[1]s +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: %[2]s + namespace: %[1]s + labels: + app: %[2]s + app.kubernetes.io/name: reloader + loadtest-version: %[3]s +spec: + replicas: 1 + selector: + matchLabels: + app: %[2]s + template: + metadata: + labels: + app: %[2]s + app.kubernetes.io/name: reloader + loadtest-version: %[3]s + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" + spec: + serviceAccountName: %[2]s + securityContext: + runAsNonRoot: true + runAsUser: 65534 + containers: + - name: reloader + image: %[4]s + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 9090 +%[5]s resources: + requests: + cpu: 10m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true +`, ns, name, m.config.Version, m.config.Image, argsYAML) +} + +// Cleanup removes all Reloader resources from the cluster. +func (m *Manager) Cleanup(ctx context.Context) error { + ns := m.namespace() + name := m.releaseName() + + delDeploy := m.kubectl(ctx, "delete", "deployment", name, "-n", ns, "--ignore-not-found") + delDeploy.Run() + + delCRB := m.kubectl(ctx, "delete", "clusterrolebinding", name, "--ignore-not-found") + delCRB.Run() + + delCR := m.kubectl(ctx, "delete", "clusterrole", name, "--ignore-not-found") + delCR.Run() + + delNS := m.kubectl(ctx, "delete", "namespace", ns, "--wait=false", "--ignore-not-found") + if err := delNS.Run(); err != nil { + return fmt.Errorf("deleting namespace: %w", err) + } + + return nil +} + +// CleanupByVersion removes Reloader resources for a specific version without needing a Manager instance. +// This is useful for cleaning up from previous runs before creating a new Manager. +func CleanupByVersion(ctx context.Context, version, kubeContext string) { + ns := fmt.Sprintf("reloader-%s", version) + name := fmt.Sprintf("reloader-%s", version) + + nsArgs := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"} + crArgs := []string{"delete", "clusterrole", name, "--ignore-not-found"} + crbArgs := []string{"delete", "clusterrolebinding", name, "--ignore-not-found"} + + if kubeContext != "" { + nsArgs = append([]string{"--context", kubeContext}, nsArgs...) + crArgs = append([]string{"--context", kubeContext}, crArgs...) + crbArgs = append([]string{"--context", kubeContext}, crbArgs...) + } + + exec.CommandContext(ctx, "kubectl", nsArgs...).Run() + exec.CommandContext(ctx, "kubectl", crArgs...).Run() + exec.CommandContext(ctx, "kubectl", crbArgs...).Run() +} + +// CollectLogs collects logs from the Reloader pod and writes them to the specified file. +func (m *Manager) CollectLogs(ctx context.Context, logPath string) error { + ns := m.namespace() + name := m.releaseName() + + if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil { + return fmt.Errorf("creating log directory: %w", err) + } + + cmd := m.kubectl(ctx, "logs", + "-n", ns, + "-l", fmt.Sprintf("app=%s", name), + "--tail=-1") + + out, err := cmd.Output() + if err != nil { + cmd = m.kubectl(ctx, "logs", + "-n", ns, + "-l", "app.kubernetes.io/name=reloader", + "--tail=-1") + out, err = cmd.Output() + if err != nil { + return fmt.Errorf("collecting logs: %w", err) + } + } + + if err := os.WriteFile(logPath, out, 0644); err != nil { + return fmt.Errorf("writing logs: %w", err) + } + + return nil +} diff --git a/test/loadtest/internal/scenarios/scenarios.go b/test/loadtest/internal/scenarios/scenarios.go new file mode 100644 index 0000000..4909feb --- /dev/null +++ b/test/loadtest/internal/scenarios/scenarios.go @@ -0,0 +1,2037 @@ +// Package scenarios contains all load test scenario implementations. +package scenarios + +import ( + "context" + "encoding/json" + "fmt" + "log" + "math/rand" + "os" + "path/filepath" + "sync" + "time" + + "github.com/stakater/Reloader/test/loadtest/internal/reloader" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/kubernetes" +) + +// ExpectedMetrics holds the expected values for metrics verification. +type ExpectedMetrics struct { + ActionTotal int `json:"action_total"` + ReloadExecutedTotal int `json:"reload_executed_total"` + ReconcileTotal int `json:"reconcile_total"` + WorkloadsScannedTotal int `json:"workloads_scanned_total"` + WorkloadsMatchedTotal int `json:"workloads_matched_total"` + SkippedTotal int `json:"skipped_total"` + Description string `json:"description"` +} + +// Runner defines the interface for test scenarios. +type Runner interface { + Name() string + Description() string + Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) +} + +// Registry holds all available test scenarios. +var Registry = map[string]Runner{ + "S1": &BurstUpdateScenario{}, + "S2": &FanOutScenario{}, + "S3": &HighCardinalityScenario{}, + "S4": &NoOpUpdateScenario{}, + "S5": &WorkloadChurnScenario{}, + "S6": &ControllerRestartScenario{}, + "S7": &APIPressureScenario{}, + "S8": &LargeObjectScenario{}, + "S9": &MultiWorkloadTypeScenario{}, + "S10": &SecretsAndMixedScenario{}, + "S11": &AnnotationStrategyScenario{}, + "S12": &PauseResumeScenario{}, + "S13": &ComplexReferencesScenario{}, +} + +// WriteExpectedMetrics writes expected metrics to a JSON file. +func WriteExpectedMetrics(scenario, resultsDir string, expected ExpectedMetrics) error { + if resultsDir == "" { + return nil + } + + dir := filepath.Join(resultsDir, scenario) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating results directory: %w", err) + } + + data, err := json.MarshalIndent(expected, "", " ") + if err != nil { + return fmt.Errorf("marshaling expected metrics: %w", err) + } + + path := filepath.Join(dir, "expected.json") + if err := os.WriteFile(path, data, 0644); err != nil { + return fmt.Errorf("writing expected metrics: %w", err) + } + + log.Printf("Expected metrics written to %s", path) + return nil +} + +// BurstUpdateScenario - Many ConfigMap/Secret updates in quick succession. +type BurstUpdateScenario struct{} + +func (s *BurstUpdateScenario) Name() string { return "S1" } +func (s *BurstUpdateScenario) Description() string { return "Burst ConfigMap/Secret updates" } + +func (s *BurstUpdateScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S1: Creating base ConfigMaps and Deployments...") + + const numConfigMaps = 10 + const numDeployments = 10 + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("burst-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial-value"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("burst-deploy-%d", i), namespace, fmt.Sprintf("burst-cm-%d", i)) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S1: Starting burst updates...") + + updateCount := 0 + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + log.Printf("S1: Context cancelled, completed %d burst updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + WorkloadsMatchedTotal: updateCount, + Description: fmt.Sprintf("S1: %d burst updates, each triggers 1 deployment reload", updateCount), + }, nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("burst-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("value-%d-%d", updateCount, time.Now().UnixNano()) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + } + } + } + + log.Printf("S1: Completed %d burst updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + WorkloadsMatchedTotal: updateCount, + Description: fmt.Sprintf("S1: %d burst updates, each triggers 1 deployment reload", updateCount), + }, nil +} + +// FanOutScenario - One ConfigMap used by many workloads. +type FanOutScenario struct{} + +func (s *FanOutScenario) Name() string { return "S2" } +func (s *FanOutScenario) Description() string { return "Fan-out (one CM -> many workloads)" } + +func (s *FanOutScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S2: Creating shared ConfigMap and multiple Deployments...") + + const numDeployments = 50 + setupCtx := context.Background() + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "shared-cm", + Namespace: namespace, + }, + Data: map[string]string{"config": "initial"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + return ExpectedMetrics{}, fmt.Errorf("failed to create shared ConfigMap: %w", err) + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("fanout-deploy-%d", i), namespace, "shared-cm") + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment %d: %v", i, err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 5*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S2: Updating shared ConfigMap...") + + if ctx.Err() != nil { + log.Printf("S2: WARNING - Context already done before update loop: %v", ctx.Err()) + } + if deadline, ok := ctx.Deadline(); ok { + remaining := time.Until(deadline) + log.Printf("S2: Context deadline in %v", remaining) + if remaining < 10*time.Second { + log.Printf("S2: WARNING - Very little time remaining on context!") + } + } else { + log.Println("S2: Context has no deadline") + } + + updateCount := 0 + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + log.Printf("S2: Will run updates for %v (duration=%v)", duration-5*time.Second, duration) + + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + expectedActions := updateCount * numDeployments + log.Printf("S2: Context done (err=%v), completed %d fan-out updates", ctx.Err(), updateCount) + return ExpectedMetrics{ + ActionTotal: expectedActions, + ReloadExecutedTotal: expectedActions, + WorkloadsScannedTotal: expectedActions, + WorkloadsMatchedTotal: expectedActions, + Description: fmt.Sprintf("S2: %d updates × %d deployments = %d expected reloads", updateCount, numDeployments, expectedActions), + }, nil + case <-ticker.C: + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "shared-cm", metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["config"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update shared ConfigMap: %v", err) + } else { + updateCount++ + log.Printf("S2: Updated shared ConfigMap (should trigger %d reloads)", numDeployments) + } + } + } + + expectedActions := updateCount * numDeployments + log.Printf("S2: Completed %d fan-out updates, expected %d total actions", updateCount, expectedActions) + return ExpectedMetrics{ + ActionTotal: expectedActions, + ReloadExecutedTotal: expectedActions, + WorkloadsScannedTotal: expectedActions, + WorkloadsMatchedTotal: expectedActions, + Description: fmt.Sprintf("S2: %d updates × %d deployments = %d expected reloads", updateCount, numDeployments, expectedActions), + }, nil +} + +// HighCardinalityScenario - Many ConfigMaps/Secrets across many namespaces. +type HighCardinalityScenario struct{} + +func (s *HighCardinalityScenario) Name() string { return "S3" } +func (s *HighCardinalityScenario) Description() string { + return "High cardinality (many CMs, many namespaces)" +} + +func (s *HighCardinalityScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S3: Creating high cardinality resources...") + + setupCtx := context.Background() + + namespaces := []string{namespace} + for i := 0; i < 10; i++ { + ns := fmt.Sprintf("%s-%d", namespace, i) + if _, err := client.CoreV1().Namespaces().Create(setupCtx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: ns}, + }, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create namespace %s: %v", ns, err) + } else { + namespaces = append(namespaces, ns) + } + } + + for _, ns := range namespaces { + for i := 0; i < 20; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("hc-cm-%d", i), + Namespace: ns, + }, + Data: map[string]string{"key": "value"}, + } + client.CoreV1().ConfigMaps(ns).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("hc-deploy-%d", i), ns, fmt.Sprintf("hc-cm-%d", i)) + client.AppsV1().Deployments(ns).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + } + + if err := waitForAllNamespacesReady(setupCtx, client, namespaces, 5*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S3: Starting random updates across namespaces...") + + updateDuration := duration - 5*time.Second + if updateDuration < 30*time.Second { + updateDuration = 30 * time.Second + } + + updateCount := 0 + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + updateCtx, updateCancel := context.WithTimeout(context.Background(), updateDuration) + defer updateCancel() + + endTime := time.Now().Add(updateDuration) + log.Printf("S3: Will run updates for %v (until %v)", updateDuration, endTime.Format("15:04:05")) + + for time.Now().Before(endTime) { + select { + case <-updateCtx.Done(): + log.Printf("S3: Completed %d high cardinality updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S3: %d updates across %d namespaces", updateCount, len(namespaces)), + }, nil + case <-ticker.C: + ns := namespaces[rand.Intn(len(namespaces))] + cmIndex := rand.Intn(20) + cm, err := client.CoreV1().ConfigMaps(ns).Get(setupCtx, fmt.Sprintf("hc-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(ns).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + updateCount++ + } + } + } + + log.Printf("S3: Completed %d high cardinality updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S3: %d updates across %d namespaces", updateCount, len(namespaces)), + }, nil +} + +// NoOpUpdateScenario - Updates that don't actually change data. +type NoOpUpdateScenario struct{} + +func (s *NoOpUpdateScenario) Name() string { return "S4" } +func (s *NoOpUpdateScenario) Description() string { return "No-op updates (same data)" } + +func (s *NoOpUpdateScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S4: Creating ConfigMaps and Deployments for no-op test...") + + setupCtx := context.Background() + + for i := 0; i < 10; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("noop-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "static-value"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("noop-deploy-%d", i), namespace, fmt.Sprintf("noop-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S4: Starting no-op updates (annotation changes only)...") + + updateCount := 0 + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + log.Printf("S4: Completed %d no-op updates", updateCount) + return ExpectedMetrics{ + ActionTotal: 0, + ReloadExecutedTotal: 0, + SkippedTotal: updateCount, + Description: fmt.Sprintf("S4: %d no-op updates, all should be skipped", updateCount), + }, nil + case <-ticker.C: + cmIndex := rand.Intn(10) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("noop-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + if cm.Annotations == nil { + cm.Annotations = make(map[string]string) + } + cm.Annotations["noop-counter"] = fmt.Sprintf("%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + updateCount++ + } + } + } + + log.Printf("S4: Completed %d no-op updates (should see 0 actions)", updateCount) + return ExpectedMetrics{ + ActionTotal: 0, + ReloadExecutedTotal: 0, + SkippedTotal: updateCount, + Description: fmt.Sprintf("S4: %d no-op updates, all should be skipped", updateCount), + }, nil +} + +// WorkloadChurnScenario - Deployments created and deleted rapidly. +type WorkloadChurnScenario struct{} + +func (s *WorkloadChurnScenario) Name() string { return "S5" } +func (s *WorkloadChurnScenario) Description() string { return "Workload churn (rapid create/delete)" } + +func (s *WorkloadChurnScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S5: Creating base ConfigMap...") + + setupCtx := context.Background() + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "churn-cm", Namespace: namespace}, + Data: map[string]string{"key": "value"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + + log.Println("S5: Starting workload churn...") + + var wg sync.WaitGroup + var mu sync.Mutex + deployCounter := 0 + deleteCounter := 0 + cmUpdateCount := 0 + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return + case <-ticker.C: + deployName := fmt.Sprintf("churn-deploy-%d", deployCounter) + deploy := createDeployment(deployName, namespace, "churn-cm") + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err == nil { + mu.Lock() + deployCounter++ + mu.Unlock() + } + if deployCounter > 10 { + oldName := fmt.Sprintf("churn-deploy-%d", deployCounter-10) + if err := client.AppsV1().Deployments(namespace).Delete(setupCtx, oldName, metav1.DeleteOptions{}); err == nil { + mu.Lock() + deleteCounter++ + mu.Unlock() + } + } + } + } + }() + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return + case <-ticker.C: + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "churn-cm", metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d", cmUpdateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + mu.Lock() + cmUpdateCount++ + mu.Unlock() + } + } + } + }() + + wg.Wait() + log.Printf("S5: Created %d, deleted %d deployments, %d CM updates", deployCounter, deleteCounter, cmUpdateCount) + + return ExpectedMetrics{ + Description: fmt.Sprintf("S5: Churn test - %d deploys created, %d deleted, %d CM updates, ~10 active deploys at any time", deployCounter, deleteCounter, cmUpdateCount), + }, nil +} + +// ControllerRestartScenario - Restart controller under load. +type ControllerRestartScenario struct { + ReloaderVersion string +} + +func (s *ControllerRestartScenario) Name() string { return "S6" } +func (s *ControllerRestartScenario) Description() string { + return "Controller restart under load" +} + +func (s *ControllerRestartScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S6: Creating resources and generating load...") + + setupCtx := context.Background() + + for i := 0; i < 20; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("restart-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("restart-deploy-%d", i), namespace, fmt.Sprintf("restart-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + var wg sync.WaitGroup + var mu sync.Mutex + updateCount := 0 + + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return + case <-ticker.C: + cmIndex := rand.Intn(20) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("restart-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + mu.Lock() + updateCount++ + mu.Unlock() + } + } + } + }() + + reloaderNS := fmt.Sprintf("reloader-%s", s.ReloaderVersion) + if s.ReloaderVersion == "" { + reloaderNS = "reloader-new" + } + + log.Println("S6: Waiting 20 seconds before restarting controller...") + time.Sleep(20 * time.Second) + + log.Println("S6: Restarting Reloader pod...") + pods, err := client.CoreV1().Pods(reloaderNS).List(setupCtx, metav1.ListOptions{ + LabelSelector: "app=reloader", + }) + if err == nil && len(pods.Items) > 0 { + client.CoreV1().Pods(reloaderNS).Delete(setupCtx, pods.Items[0].Name, metav1.DeleteOptions{}) + } + + wg.Wait() + log.Printf("S6: Controller restart scenario completed with %d updates", updateCount) + return ExpectedMetrics{ + Description: fmt.Sprintf("S6: Restart test - %d updates during restart", updateCount), + }, nil +} + +// APIPressureScenario - Simulate API server pressure with many concurrent requests. +type APIPressureScenario struct{} + +func (s *APIPressureScenario) Name() string { return "S7" } +func (s *APIPressureScenario) Description() string { return "API pressure (many concurrent requests)" } + +func (s *APIPressureScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S7: Creating resources for API pressure test...") + + const numConfigMaps = 50 + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("api-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "value"}, + } + client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}) + deploy := createDeployment(fmt.Sprintf("api-deploy-%d", i), namespace, fmt.Sprintf("api-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 5*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S7: Starting concurrent updates from multiple goroutines...") + + updateDuration := duration - 5*time.Second + if updateDuration < 30*time.Second { + updateDuration = 30 * time.Second + } + + updateCtx, updateCancel := context.WithTimeout(context.Background(), updateDuration) + defer updateCancel() + + endTime := time.Now().Add(updateDuration) + log.Printf("S7: Will run updates for %v (until %v)", updateDuration, endTime.Format("15:04:05")) + + var wg sync.WaitGroup + var mu sync.Mutex + totalUpdates := 0 + + for g := 0; g < 10; g++ { + wg.Add(1) + go func(goroutineID int) { + defer wg.Done() + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + updateCount := 0 + for time.Now().Before(endTime) { + select { + case <-updateCtx.Done(): + return + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("api-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("g%d-update-%d", goroutineID, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + updateCount++ + } + } + } + mu.Lock() + totalUpdates += updateCount + mu.Unlock() + log.Printf("S7: Goroutine %d completed %d updates", goroutineID, updateCount) + }(g) + } + + wg.Wait() + log.Printf("S7: API pressure scenario completed with %d total updates", totalUpdates) + return ExpectedMetrics{ + ActionTotal: totalUpdates, + ReloadExecutedTotal: totalUpdates, + Description: fmt.Sprintf("S7: %d concurrent updates from 10 goroutines", totalUpdates), + }, nil +} + +// LargeObjectScenario - Large ConfigMaps/Secrets. +type LargeObjectScenario struct{} + +func (s *LargeObjectScenario) Name() string { return "S8" } +func (s *LargeObjectScenario) Description() string { return "Large ConfigMaps/Secrets (>100KB)" } + +func (s *LargeObjectScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S8: Creating large ConfigMaps...") + + setupCtx := context.Background() + + largeData := make([]byte, 100*1024) + for i := range largeData { + largeData[i] = byte('a' + (i % 26)) + } + largeValue := string(largeData) + + for i := 0; i < 10; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("large-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{ + "large-key-1": largeValue, + "large-key-2": largeValue, + }, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create large ConfigMap %d: %v", i, err) + } + deploy := createDeployment(fmt.Sprintf("large-deploy-%d", i), namespace, fmt.Sprintf("large-cm-%d", i)) + client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}) + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S8: Starting large object updates...") + + updateCount := 0 + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + log.Printf("S8: Completed %d large object updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S8: %d large object (100KB) updates", updateCount), + }, nil + case <-ticker.C: + cmIndex := rand.Intn(10) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("large-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["large-key-1"] = largeValue[:len(largeValue)-10] + fmt.Sprintf("-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update large ConfigMap: %v", err) + } else { + updateCount++ + } + } + } + + log.Printf("S8: Completed %d large object updates", updateCount) + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S8: %d large object (100KB) updates", updateCount), + }, nil +} + +func waitForDeploymentsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error { + log.Printf("Waiting for all deployments in %s to be ready (timeout: %v)...", namespace, timeout) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + deployments, err := client.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list deployments: %w", err) + } + + allReady := true + notReady := 0 + for _, d := range deployments.Items { + if d.Status.ReadyReplicas < *d.Spec.Replicas { + allReady = false + notReady++ + } + } + + if allReady && len(deployments.Items) > 0 { + log.Printf("All %d deployments in %s are ready", len(deployments.Items), namespace) + return nil + } + + log.Printf("Waiting for deployments: %d/%d not ready yet...", notReady, len(deployments.Items)) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for deployments to be ready") +} + +func waitForAllNamespacesReady(ctx context.Context, client kubernetes.Interface, namespaces []string, timeout time.Duration) error { + log.Printf("Waiting for deployments in %d namespaces to be ready...", len(namespaces)) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + allReady := true + totalDeploys := 0 + notReady := 0 + + for _, ns := range namespaces { + deployments, err := client.AppsV1().Deployments(ns).List(ctx, metav1.ListOptions{}) + if err != nil { + continue + } + for _, d := range deployments.Items { + totalDeploys++ + if d.Status.ReadyReplicas < *d.Spec.Replicas { + allReady = false + notReady++ + } + } + } + + if allReady && totalDeploys > 0 { + log.Printf("All %d deployments across %d namespaces are ready", totalDeploys, len(namespaces)) + return nil + } + + log.Printf("Waiting: %d/%d deployments not ready yet...", notReady, totalDeploys) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for deployments to be ready") +} + +func createDeployment(name, namespace, configMapName string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func createDeploymentWithSecret(name, namespace, secretName string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + SecretRef: &corev1.SecretEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secretName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func createDeploymentWithBoth(name, namespace, configMapName, secretName string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + { + SecretRef: &corev1.SecretEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secretName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +// SecretsAndMixedScenario - Tests Secrets and mixed ConfigMap+Secret workloads. +type SecretsAndMixedScenario struct{} + +func (s *SecretsAndMixedScenario) Name() string { return "S10" } +func (s *SecretsAndMixedScenario) Description() string { + return "Secrets and mixed ConfigMap+Secret workloads" +} + +func (s *SecretsAndMixedScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S10: Creating Secrets, ConfigMaps, and mixed workloads...") + + const numSecrets = 5 + const numConfigMaps = 5 + const numSecretOnlyDeploys = 5 + const numConfigMapOnlyDeploys = 3 + const numMixedDeploys = 2 + + setupCtx := context.Background() + + for i := 0; i < numSecrets; i++ { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("mixed-secret-%d", i), + Namespace: namespace, + }, + StringData: map[string]string{ + "password": fmt.Sprintf("initial-secret-%d", i), + }, + } + if _, err := client.CoreV1().Secrets(namespace).Create(setupCtx, secret, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Secret %s: %v", secret.Name, err) + } + } + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("mixed-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{ + "config": fmt.Sprintf("initial-config-%d", i), + }, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numSecretOnlyDeploys; i++ { + deploy := createDeploymentWithSecret( + fmt.Sprintf("secret-only-deploy-%d", i), + namespace, + fmt.Sprintf("mixed-secret-%d", i%numSecrets), + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Secret-only Deployment: %v", err) + } + } + + for i := 0; i < numConfigMapOnlyDeploys; i++ { + deploy := createDeployment( + fmt.Sprintf("cm-only-deploy-%d", i), + namespace, + fmt.Sprintf("mixed-cm-%d", i%numConfigMaps), + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap-only Deployment: %v", err) + } + } + + for i := 0; i < numMixedDeploys; i++ { + deploy := createDeploymentWithBoth( + fmt.Sprintf("mixed-deploy-%d", i), + namespace, + fmt.Sprintf("mixed-cm-%d", i%numConfigMaps), + fmt.Sprintf("mixed-secret-%d", i%numSecrets), + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create mixed Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S10: Starting alternating Secret and ConfigMap updates...") + + secretUpdateCount := 0 + cmUpdateCount := 0 + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + updateSecret := true + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(secretUpdateCount, cmUpdateCount, numSecrets, numConfigMaps, numSecretOnlyDeploys, numConfigMapOnlyDeploys, numMixedDeploys), nil + case <-ticker.C: + if updateSecret { + secretIndex := rand.Intn(numSecrets) + secret, err := client.CoreV1().Secrets(namespace).Get(setupCtx, fmt.Sprintf("mixed-secret-%d", secretIndex), metav1.GetOptions{}) + if err != nil { + continue + } + secret.StringData = map[string]string{ + "password": fmt.Sprintf("updated-secret-%d-%d", secretIndex, secretUpdateCount), + } + if _, err := client.CoreV1().Secrets(namespace).Update(setupCtx, secret, metav1.UpdateOptions{}); err == nil { + secretUpdateCount++ + } + } else { + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("mixed-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["config"] = fmt.Sprintf("updated-config-%d-%d", cmIndex, cmUpdateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err == nil { + cmUpdateCount++ + } + } + updateSecret = !updateSecret + } + } + + log.Printf("S10: Completed %d Secret updates and %d ConfigMap updates", secretUpdateCount, cmUpdateCount) + return s.calculateExpected(secretUpdateCount, cmUpdateCount, numSecrets, numConfigMaps, numSecretOnlyDeploys, numConfigMapOnlyDeploys, numMixedDeploys), nil +} + +func (s *SecretsAndMixedScenario) calculateExpected(secretUpdates, cmUpdates, numSecrets, numConfigMaps, secretOnlyDeploys, cmOnlyDeploys, mixedDeploys int) ExpectedMetrics { + avgSecretReloads := float64(secretOnlyDeploys)/float64(numSecrets) + float64(mixedDeploys)/float64(numSecrets) + secretTriggeredReloads := int(float64(secretUpdates) * avgSecretReloads) + + avgCMReloads := float64(cmOnlyDeploys)/float64(numConfigMaps) + float64(mixedDeploys)/float64(numConfigMaps) + cmTriggeredReloads := int(float64(cmUpdates) * avgCMReloads) + + totalExpectedReloads := secretTriggeredReloads + cmTriggeredReloads + + return ExpectedMetrics{ + ActionTotal: totalExpectedReloads, + ReloadExecutedTotal: totalExpectedReloads, + Description: fmt.Sprintf("S10: %d Secret updates (→%d reloads, avg %.1f/update) + %d CM updates (→%d reloads, avg %.1f/update) = %d total", + secretUpdates, secretTriggeredReloads, avgSecretReloads, cmUpdates, cmTriggeredReloads, avgCMReloads, totalExpectedReloads), + } +} + +// MultiWorkloadTypeScenario - Tests all supported workload types with a shared ConfigMap. +type MultiWorkloadTypeScenario struct{} + +func (s *MultiWorkloadTypeScenario) Name() string { return "S9" } +func (s *MultiWorkloadTypeScenario) Description() string { + return "Multi-workload types (Deploy, StatefulSet, DaemonSet, Job, CronJob)" +} + +func (s *MultiWorkloadTypeScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S9: Creating shared ConfigMap and multiple workload types...") + + const numDeployments = 5 + const numStatefulSets = 3 + const numDaemonSets = 2 + + setupCtx := context.Background() + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-type-cm", + Namespace: namespace, + }, + Data: map[string]string{"config": "initial"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + return ExpectedMetrics{}, fmt.Errorf("failed to create shared ConfigMap: %w", err) + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("multi-deploy-%d", i), namespace, "multi-type-cm") + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment %d: %v", i, err) + } + } + + for i := 0; i < numStatefulSets; i++ { + sts := createStatefulSet(fmt.Sprintf("multi-sts-%d", i), namespace, "multi-type-cm") + if _, err := client.AppsV1().StatefulSets(namespace).Create(setupCtx, sts, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create StatefulSet %d: %v", i, err) + } + } + + for i := 0; i < numDaemonSets; i++ { + ds := createDaemonSet(fmt.Sprintf("multi-ds-%d", i), namespace, "multi-type-cm") + if _, err := client.AppsV1().DaemonSets(namespace).Create(setupCtx, ds, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create DaemonSet %d: %v", i, err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + if err := waitForStatefulSetsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + if err := waitForDaemonSetsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S9: Starting ConfigMap updates to trigger reloads on all workload types...") + + updateCount := 0 + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets), nil + case <-ticker.C: + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, "multi-type-cm", metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["config"] = fmt.Sprintf("update-%d", updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update shared ConfigMap: %v", err) + } else { + updateCount++ + log.Printf("S9: Updated shared ConfigMap (update #%d)", updateCount) + } + } + } + + log.Printf("S9: Completed %d ConfigMap updates", updateCount) + return s.calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets), nil +} + +func (s *MultiWorkloadTypeScenario) calculateExpected(updateCount, numDeployments, numStatefulSets, numDaemonSets int) ExpectedMetrics { + totalWorkloads := numDeployments + numStatefulSets + numDaemonSets + expectedReloads := updateCount * totalWorkloads + + return ExpectedMetrics{ + ActionTotal: expectedReloads, + ReloadExecutedTotal: expectedReloads, + WorkloadsMatchedTotal: expectedReloads, + Description: fmt.Sprintf("S9: %d CM updates × %d workloads (%d Deploys + %d STS + %d DS) = %d reloads", + updateCount, totalWorkloads, numDeployments, numStatefulSets, numDaemonSets, expectedReloads), + } +} + +func createStatefulSet(name, namespace, configMapName string) *appsv1.StatefulSet { + replicas := int32(1) + terminationGracePeriod := int64(0) + + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: &replicas, + ServiceName: name, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func createDaemonSet(name, namespace, configMapName string) *appsv1.DaemonSet { + terminationGracePeriod := int64(0) + + return &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Tolerations: []corev1.Toleration{ + { + Key: "node-role.kubernetes.io/control-plane", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + { + Key: "node-role.kubernetes.io/master", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }, + }, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +func waitForStatefulSetsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error { + log.Printf("Waiting for all StatefulSets in %s to be ready (timeout: %v)...", namespace, timeout) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + stsList, err := client.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list StatefulSets: %w", err) + } + + if len(stsList.Items) == 0 { + log.Printf("No StatefulSets found in %s", namespace) + return nil + } + + allReady := true + notReady := 0 + for _, sts := range stsList.Items { + if sts.Status.ReadyReplicas < *sts.Spec.Replicas { + allReady = false + notReady++ + } + } + + if allReady { + log.Printf("All %d StatefulSets in %s are ready", len(stsList.Items), namespace) + return nil + } + + log.Printf("Waiting for StatefulSets: %d/%d not ready yet...", notReady, len(stsList.Items)) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for StatefulSets to be ready") +} + +func waitForDaemonSetsReady(ctx context.Context, client kubernetes.Interface, namespace string, timeout time.Duration) error { + log.Printf("Waiting for all DaemonSets in %s to be ready (timeout: %v)...", namespace, timeout) + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + dsList, err := client.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list DaemonSets: %w", err) + } + + if len(dsList.Items) == 0 { + log.Printf("No DaemonSets found in %s", namespace) + return nil + } + + allReady := true + notReady := 0 + for _, ds := range dsList.Items { + if ds.Status.NumberReady < ds.Status.DesiredNumberScheduled { + allReady = false + notReady++ + } + } + + if allReady { + log.Printf("All %d DaemonSets in %s are ready", len(dsList.Items), namespace) + return nil + } + + log.Printf("Waiting for DaemonSets: %d/%d not ready yet...", notReady, len(dsList.Items)) + time.Sleep(5 * time.Second) + } + + return fmt.Errorf("timeout waiting for DaemonSets to be ready") +} + +// ComplexReferencesScenario - Tests init containers, valueFrom, and projected volumes. +type ComplexReferencesScenario struct{} + +func (s *ComplexReferencesScenario) Name() string { return "S13" } +func (s *ComplexReferencesScenario) Description() string { + return "Complex references (init containers, valueFrom, projected volumes)" +} + +func (s *ComplexReferencesScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S13: Creating ConfigMaps and complex deployments with various reference types...") + + const numConfigMaps = 5 + const numDeployments = 5 + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("complex-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{ + "key1": fmt.Sprintf("value1-%d", i), + "key2": fmt.Sprintf("value2-%d", i), + "config": fmt.Sprintf("config-%d", i), + }, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + primaryCM := fmt.Sprintf("complex-cm-%d", i) + secondaryCM := fmt.Sprintf("complex-cm-%d", (i+1)%numConfigMaps) + + deploy := createComplexDeployment( + fmt.Sprintf("complex-deploy-%d", i), + namespace, + primaryCM, + secondaryCM, + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create complex Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S13: Starting ConfigMap updates to test all reference types...") + + updateCount := 0 + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, numConfigMaps, numDeployments), nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("complex-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key1"] = fmt.Sprintf("updated-value1-%d-%d", cmIndex, updateCount) + cm.Data["config"] = fmt.Sprintf("updated-config-%d-%d", cmIndex, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + log.Printf("S13: Updated complex-cm-%d (update #%d)", cmIndex, updateCount) + } + } + } + + log.Printf("S13: Completed %d ConfigMap updates", updateCount) + return s.calculateExpected(updateCount, numConfigMaps, numDeployments), nil +} + +func (s *ComplexReferencesScenario) calculateExpected(updateCount, numConfigMaps, numDeployments int) ExpectedMetrics { + expectedReloadsPerUpdate := 2 + expectedReloads := updateCount * expectedReloadsPerUpdate + + return ExpectedMetrics{ + ActionTotal: expectedReloads, + ReloadExecutedTotal: expectedReloads, + Description: fmt.Sprintf("S13: %d CM updates × ~%d affected deploys = ~%d reloads (init containers, valueFrom, volumes, projected)", + updateCount, expectedReloadsPerUpdate, expectedReloads), + } +} + +// PauseResumeScenario - Tests pause-period functionality under rapid updates. +type PauseResumeScenario struct{} + +func (s *PauseResumeScenario) Name() string { return "S12" } +func (s *PauseResumeScenario) Description() string { + return "Pause & Resume (rapid updates with pause-period)" +} + +func (s *PauseResumeScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + log.Println("S12: Creating ConfigMaps and Deployments with pause-period annotation...") + + const numConfigMaps = 10 + const numDeployments = 10 + const pausePeriod = 15 * time.Second + const updateInterval = 2 * time.Second + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pause-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial-value"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeploymentWithPause( + fmt.Sprintf("pause-deploy-%d", i), + namespace, + fmt.Sprintf("pause-cm-%d", i), + pausePeriod, + ) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Printf("S12: Starting rapid ConfigMap updates (every %v) with %v pause-period...", updateInterval, pausePeriod) + + updateCount := 0 + ticker := time.NewTicker(updateInterval) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 5*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, duration, updateInterval, pausePeriod), nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("pause-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d-%d", cmIndex, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + } + } + } + + log.Printf("S12: Completed %d rapid updates (pause-period should reduce actual reloads)", updateCount) + return s.calculateExpected(updateCount, duration, updateInterval, pausePeriod), nil +} + +func (s *PauseResumeScenario) calculateExpected(updateCount int, duration, updateInterval, pausePeriod time.Duration) ExpectedMetrics { + + // This is an approximation - the actual value depends on random distribution + expectedCycles := int(duration / pausePeriod) + if expectedCycles < 1 { + expectedCycles = 1 + } + + return ExpectedMetrics{ + Description: fmt.Sprintf("S12: %d updates with %v pause-period (expect ~%d reload cycles, actual reloads << updates)", + updateCount, pausePeriod, expectedCycles), + } +} + +// AnnotationStrategyScenario - Tests annotation-based reload strategy. +// This scenario deploys its own Reloader instance with --reload-strategy=annotations. +type AnnotationStrategyScenario struct { + Image string +} + +func (s *AnnotationStrategyScenario) Name() string { return "S11" } +func (s *AnnotationStrategyScenario) Description() string { + return "Annotation reload strategy (--reload-strategy=annotations)" +} + +func (s *AnnotationStrategyScenario) Run(ctx context.Context, client kubernetes.Interface, namespace string, duration time.Duration) (ExpectedMetrics, error) { + if s.Image == "" { + return ExpectedMetrics{}, fmt.Errorf("S11 requires Image to be set (use the same image as --new-image)") + } + + log.Println("S11: Deploying Reloader with --reload-strategy=annotations...") + + reloaderNS := "reloader-s11" + mgr := reloader.NewManager(reloader.Config{ + Version: "s11", + Image: s.Image, + Namespace: reloaderNS, + ReloadStrategy: "annotations", + }) + + if err := mgr.Deploy(ctx); err != nil { + return ExpectedMetrics{}, fmt.Errorf("deploying S11 reloader: %w", err) + } + + defer func() { + log.Println("S11: Cleaning up S11-specific Reloader...") + cleanupCtx := context.Background() + if err := mgr.Cleanup(cleanupCtx); err != nil { + log.Printf("Warning: failed to cleanup S11 reloader: %v", err) + } + }() + + log.Println("S11: Creating ConfigMaps and Deployments...") + + const numConfigMaps = 10 + const numDeployments = 10 + + setupCtx := context.Background() + + for i := 0; i < numConfigMaps; i++ { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("annot-cm-%d", i), + Namespace: namespace, + }, + Data: map[string]string{"key": "initial-value"}, + } + if _, err := client.CoreV1().ConfigMaps(namespace).Create(setupCtx, cm, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create ConfigMap %s: %v", cm.Name, err) + } + } + + for i := 0; i < numDeployments; i++ { + deploy := createDeployment(fmt.Sprintf("annot-deploy-%d", i), namespace, fmt.Sprintf("annot-cm-%d", i)) + if _, err := client.AppsV1().Deployments(namespace).Create(setupCtx, deploy, metav1.CreateOptions{}); err != nil { + log.Printf("Failed to create Deployment: %v", err) + } + } + + if err := waitForDeploymentsReady(setupCtx, client, namespace, 3*time.Minute); err != nil { + log.Printf("Warning: %v - continuing anyway", err) + } + + log.Println("S11: Starting ConfigMap updates with annotation strategy...") + + updateCount := 0 + annotationUpdatesSeen := 0 + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + endTime := time.Now().Add(duration - 10*time.Second) + for time.Now().Before(endTime) { + select { + case <-ctx.Done(): + return s.calculateExpected(updateCount, annotationUpdatesSeen), nil + case <-ticker.C: + cmIndex := rand.Intn(numConfigMaps) + cm, err := client.CoreV1().ConfigMaps(namespace).Get(setupCtx, fmt.Sprintf("annot-cm-%d", cmIndex), metav1.GetOptions{}) + if err != nil { + continue + } + cm.Data["key"] = fmt.Sprintf("update-%d-%d", cmIndex, updateCount) + if _, err := client.CoreV1().ConfigMaps(namespace).Update(setupCtx, cm, metav1.UpdateOptions{}); err != nil { + log.Printf("Failed to update ConfigMap: %v", err) + } else { + updateCount++ + } + + if updateCount%10 == 0 { + deploy, err := client.AppsV1().Deployments(namespace).Get(setupCtx, fmt.Sprintf("annot-deploy-%d", cmIndex), metav1.GetOptions{}) + if err == nil { + if _, hasAnnotation := deploy.Spec.Template.Annotations["reloader.stakater.com/last-reloaded-from"]; hasAnnotation { + annotationUpdatesSeen++ + } + } + } + } + } + + log.Println("S11: Verifying annotation-based reload...") + time.Sleep(5 * time.Second) + + deploysWithAnnotation := 0 + for i := 0; i < numDeployments; i++ { + deploy, err := client.AppsV1().Deployments(namespace).Get(setupCtx, fmt.Sprintf("annot-deploy-%d", i), metav1.GetOptions{}) + if err != nil { + continue + } + if deploy.Spec.Template.Annotations != nil { + if _, ok := deploy.Spec.Template.Annotations["reloader.stakater.com/last-reloaded-from"]; ok { + deploysWithAnnotation++ + } + } + } + + log.Printf("S11: Completed %d updates, %d deployments have reload annotation", updateCount, deploysWithAnnotation) + return s.calculateExpected(updateCount, deploysWithAnnotation), nil +} + +func (s *AnnotationStrategyScenario) calculateExpected(updateCount, deploysWithAnnotation int) ExpectedMetrics { + return ExpectedMetrics{ + ActionTotal: updateCount, + ReloadExecutedTotal: updateCount, + Description: fmt.Sprintf("S11: %d updates with annotation strategy, %d deployments received annotation", + updateCount, deploysWithAnnotation), + } +} + +func createDeploymentWithPause(name, namespace, configMapName string, pausePeriod time.Duration) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + // Deployment-specific pause-period annotation + "deployment.reloader.stakater.com/pause-period": fmt.Sprintf("%ds", int(pausePeriod.Seconds())), + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + Containers: []corev1.Container{ + { + Name: "app", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: configMapName, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} + +// createComplexDeployment creates a deployment with multiple ConfigMap reference types. +// - Init container using envFrom +// - Main container using env.valueFrom.configMapKeyRef +// - Sidecar container using volume mount +// - Projected volume combining multiple ConfigMaps +func createComplexDeployment(name, namespace, primaryCM, secondaryCM string) *appsv1.Deployment { + replicas := int32(1) + maxSurge := intstr.FromInt(1) + maxUnavailable := intstr.FromInt(1) + terminationGracePeriod := int64(0) + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{ + "reloader.stakater.com/auto": "true", + }, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxSurge: &maxSurge, + MaxUnavailable: &maxUnavailable, + }, + }, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"app": name}, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: &terminationGracePeriod, + InitContainers: []corev1.Container{ + { + Name: "init", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "echo Init done"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + EnvFrom: []corev1.EnvFromSource{ + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + }, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "main", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + Env: []corev1.EnvVar{ + { + Name: "CONFIG_KEY1", + ValueFrom: &corev1.EnvVarSource{ + ConfigMapKeyRef: &corev1.ConfigMapKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + Key: "key1", + }, + }, + }, + { + Name: "CONFIG_KEY2", + ValueFrom: &corev1.EnvVarSource{ + ConfigMapKeyRef: &corev1.ConfigMapKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + Key: "key2", + }, + }, + }, + }, + }, + { + Name: "sidecar", + Image: "gcr.io/google-containers/busybox:1.27", + Command: []string{"sh", "-c", "sleep 999999999"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1m"), + corev1.ResourceMemory: resource.MustParse("4Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("10m"), + corev1.ResourceMemory: resource.MustParse("16Mi"), + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "config-volume", + MountPath: "/etc/config", + }, + { + Name: "projected-volume", + MountPath: "/etc/projected", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "config-volume", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + }, + }, + }, + { + Name: "projected-volume", + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: primaryCM, + }, + Items: []corev1.KeyToPath{ + { + Key: "key1", + Path: "primary-key1", + }, + }, + }, + }, + { + ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secondaryCM, + }, + Items: []corev1.KeyToPath{ + { + Key: "key1", + Path: "secondary-key1", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } +} diff --git a/test/loadtest/manifests/prometheus.yaml b/test/loadtest/manifests/prometheus.yaml new file mode 100644 index 0000000..f826f52 --- /dev/null +++ b/test/loadtest/manifests/prometheus.yaml @@ -0,0 +1,181 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: monitoring +data: + prometheus.yml: | + global: + scrape_interval: 2s + evaluation_interval: 2s + + scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'reloader-old' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - reloader-old + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'reloader-new' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - reloader-new + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: + - configmaps + verbs: ["get"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + serviceAccountName: prometheus + containers: + - name: prometheus + image: quay.io/prometheus/prometheus:v2.47.0 + args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.console.libraries=/usr/share/prometheus/console_libraries + - --web.console.templates=/usr/share/prometheus/consoles + - --web.enable-lifecycle + ports: + - containerPort: 9090 + volumeMounts: + - name: config + mountPath: /etc/prometheus + - name: data + mountPath: /prometheus + resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 200m + memory: 512Mi + readinessProbe: + httpGet: + path: /-/ready + port: 9090 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /-/healthy + port: 9090 + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: config + configMap: + name: prometheus-config + - name: data + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: monitoring +spec: + selector: + app: prometheus + ports: + - port: 9090 + targetPort: 9090 + type: NodePort