Compare commits

..

45 Commits

Author SHA1 Message Date
Muhammad Safwan Karim
9aa957c839 Merge pull request #1096 from stakater/chart/release-1.4.13
Bump chart and version for 1.4.13
2026-02-14 00:12:29 +05:00
Safwan
20e2680539 Bump chart and version 2026-02-13 21:44:45 +05:00
Felix Tonnvik
f51b62dee9 Merge pull request #1094 from stakater/fix/bump-go-ver
Bump go version to 1.26
2026-02-13 14:26:45 +01:00
Safwan
f776e2dfd0 fixed errors 2026-02-13 17:05:29 +05:00
Safwan
003fbbfa1f Bump go version to 1.26 2026-02-13 15:53:18 +05:00
Felix Tonnvik
8834ab097d Merge pull request #1082 from stakater/edition-variable
Add Edition variable in metadata
2026-02-13 11:27:52 +01:00
Safwan
3ee87d3725 resolved comments 2026-01-20 16:26:48 +05:00
Safwan
5c3593fb1c revert values.yaml 2026-01-13 00:59:51 +05:00
Safwan
8537502bbd revert extra changes 2026-01-13 00:41:12 +05:00
Safwan
fd5f03adfb disable testing temporarily 2026-01-13 00:24:39 +05:00
Safwan
32899e1983 login to registry 2026-01-13 00:17:22 +05:00
Safwan
6c15e5db24 test by pushing images 2026-01-12 23:29:16 +05:00
Safwan
4a95a813cd Add Edition variable in metadata 2026-01-12 23:02:26 +05:00
iLLeniumStudios
3dd2741102 Merge pull request #1074 from TheiLLeniumStudios/test/loadtests
feat: Add load test framework with observability metrics
2026-01-09 16:15:35 +01:00
TheiLLeniumStudios
16ff7f6ac9 fix: Default reconcile metric result to error for panic safety 2026-01-09 15:27:19 +01:00
TheiLLeniumStudios
1be910749b chore: A lot of cleanup 2026-01-09 01:34:16 +01:00
TheiLLeniumStudios
1945a740d0 chore: Format files# 2026-01-09 01:22:24 +01:00
TheiLLeniumStudios
07f7365d63 ci: Enable tests for PR again 2026-01-09 01:14:45 +01:00
TheiLLeniumStudios
ad6013adbf fix: Treat missing metrics as info 2026-01-09 00:58:28 +01:00
TheiLLeniumStudios
a132ed8dea ci: Don't comment on forked PRs automatically 2026-01-09 00:48:05 +01:00
TheiLLeniumStudios
2674f405ce fix: Issue with not parsing multiple scenario in args 2026-01-09 00:44:43 +01:00
TheiLLeniumStudios
e56323d582 ci: Disable tests in PR 2026-01-09 00:37:34 +01:00
TheiLLeniumStudios
c4f3255c78 ci: Disable tests temporarily 2026-01-09 00:34:25 +01:00
TheiLLeniumStudios
2442eddd81 fix: Skip loading images when already done 2026-01-09 00:34:09 +01:00
TheiLLeniumStudios
76287e0420 refactor: Cleanup logic for reloader in loadtests 2026-01-08 23:36:02 +01:00
TheiLLeniumStudios
322c4bc130 feat: Use cobra for loadtest CLI commands 2026-01-08 23:26:41 +01:00
TheiLLeniumStudios
958c6c2be7 feat(ci): Separate action for loadtests 2026-01-08 22:52:07 +01:00
TheiLLeniumStudios
922cac120a fix: Update gitignore to include results and nfs files 2026-01-08 22:15:21 +01:00
TheiLLeniumStudios
b945e5e828 chore: Merge from master 2026-01-08 22:12:19 +01:00
Faizan Ahmad
1652c62775 Merge pull request #1076 from stakater/csi-support-without-SHA512
Feature - Csi support without sha512
2026-01-08 18:14:04 +01:00
TheiLLeniumStudios
193f64c0ec fix: Missing reloader.go 2026-01-08 14:22:43 +01:00
TheiLLeniumStudios
f7210204d4 fix: Safe parsing for duration 2026-01-08 11:41:44 +01:00
TheiLLeniumStudios
eb3bc2447e refactor(upgrade.go): simplify retryOnConflict to return matched status and error 2026-01-08 11:12:10 +01:00
TheiLLeniumStudios
27f49ecc0f fix: Use go 1.25 for load tests in CI 2026-01-08 11:08:13 +01:00
faizanahmad055
8373b1e76c Merge branch 'master' of github.com:stakater/Reloader into csi-support-without-SHA512 2026-01-08 00:33:27 +01:00
TheiLLeniumStudios
a419b07e02 Merge branch 'master' of github.com:TheiLLeniumStudios/Reloader into test/loadtests 2026-01-07 13:45:27 +01:00
Faizan Ahmad
fdd2474b3f Merge pull request #1077 from stakater/bugfix/fix-ubi-image
Fix ubi image build failure
2026-01-07 13:41:02 +01:00
faizanahmad055
4c0883b4cf Fix ubi image build failure
Signed-off-by: faizanahmad055 <faizan.ahmad55@outlook.com>
2026-01-07 12:45:04 +01:00
faizanahmad055
157cf0f2e4 Remove SHA1 changes
Signed-off-by: faizanahmad055 <faizan.ahmad55@outlook.com>
2026-01-07 12:13:04 +01:00
faizanahmad055
6fd7c8254a Update filtering in UBI image
Signed-off-by: faizanahmad055 <faizan.ahmad55@outlook.com>
2026-01-07 10:28:38 +01:00
faizanahmad055
703319e732 Improve file filtering in UBI docker image
Signed-off-by: faizanahmad055 <faizan.ahmad55@outlook.com>
2026-01-07 09:27:29 +01:00
faizanahmad055
b0ca635e49 Add file filtering in UBI docker image
Signed-off-by: faizanahmad055 <faizan.ahmad55@outlook.com>
2026-01-07 09:11:48 +01:00
TheiLLeniumStudios
5b63610f4f ci: Remove manual_dispatch from loadtests 2026-01-06 11:47:23 +01:00
TheiLLeniumStudios
512278d740 ci: Allow manual trigger 2026-01-06 11:06:44 +01:00
TheiLLeniumStudios
9a3edf13d2 feat: Load tests 2026-01-06 11:03:26 +01:00
46 changed files with 7023 additions and 288 deletions

267
.github/actions/loadtest/action.yml vendored Normal file
View File

@@ -0,0 +1,267 @@
name: 'Reloader Load Test'
description: 'Run Reloader load tests with A/B comparison support'
inputs:
old-ref:
description: 'Git ref for "old" version (optional, enables A/B comparison)'
required: false
default: ''
new-ref:
description: 'Git ref for "new" version (defaults to current checkout)'
required: false
default: ''
old-image:
description: 'Pre-built container image for "old" version (alternative to old-ref)'
required: false
default: ''
new-image:
description: 'Pre-built container image for "new" version (alternative to new-ref)'
required: false
default: ''
scenarios:
description: 'Scenarios to run: S1,S4,S6 or all'
required: false
default: 'S1,S4,S6'
test-type:
description: 'Test type label for summary: quick or full'
required: false
default: 'quick'
duration:
description: 'Test duration in seconds'
required: false
default: '60'
kind-cluster:
description: 'Name of existing Kind cluster (if empty, creates new one)'
required: false
default: ''
post-comment:
description: 'Post results as PR comment'
required: false
default: 'false'
pr-number:
description: 'PR number for commenting (required if post-comment is true)'
required: false
default: ''
github-token:
description: 'GitHub token for posting comments'
required: false
default: ${{ github.token }}
comment-header:
description: 'Optional header text for the comment'
required: false
default: ''
outputs:
status:
description: 'Overall test status: pass or fail'
value: ${{ steps.run.outputs.status }}
summary:
description: 'Markdown summary of results'
value: ${{ steps.summary.outputs.summary }}
pass-count:
description: 'Number of passed scenarios'
value: ${{ steps.summary.outputs.pass_count }}
fail-count:
description: 'Number of failed scenarios'
value: ${{ steps.summary.outputs.fail_count }}
runs:
using: 'composite'
steps:
- name: Determine images to use
id: images
shell: bash
run: |
# Determine old image
if [ -n "${{ inputs.old-image }}" ]; then
echo "old=${{ inputs.old-image }}" >> $GITHUB_OUTPUT
elif [ -n "${{ inputs.old-ref }}" ]; then
echo "old=localhost/reloader:old" >> $GITHUB_OUTPUT
echo "build_old=true" >> $GITHUB_OUTPUT
else
echo "old=" >> $GITHUB_OUTPUT
fi
# Determine new image
if [ -n "${{ inputs.new-image }}" ]; then
echo "new=${{ inputs.new-image }}" >> $GITHUB_OUTPUT
elif [ -n "${{ inputs.new-ref }}" ]; then
echo "new=localhost/reloader:new" >> $GITHUB_OUTPUT
echo "build_new=true" >> $GITHUB_OUTPUT
else
# Default: build from current checkout
echo "new=localhost/reloader:new" >> $GITHUB_OUTPUT
echo "build_new_current=true" >> $GITHUB_OUTPUT
fi
- name: Build old image from ref
if: steps.images.outputs.build_old == 'true'
shell: bash
run: |
CURRENT_SHA=$(git rev-parse HEAD)
git checkout ${{ inputs.old-ref }}
docker build -t localhost/reloader:old .
echo "Built old image from ref: ${{ inputs.old-ref }}"
git checkout $CURRENT_SHA
- name: Build new image from ref
if: steps.images.outputs.build_new == 'true'
shell: bash
run: |
CURRENT_SHA=$(git rev-parse HEAD)
git checkout ${{ inputs.new-ref }}
docker build -t localhost/reloader:new .
echo "Built new image from ref: ${{ inputs.new-ref }}"
git checkout $CURRENT_SHA
- name: Build new image from current checkout
if: steps.images.outputs.build_new_current == 'true'
shell: bash
run: |
docker build -t localhost/reloader:new .
echo "Built new image from current checkout"
- name: Build loadtest binary
shell: bash
run: |
cd ${{ github.workspace }}/test/loadtest
go build -o loadtest ./cmd/loadtest
- name: Determine cluster name
id: cluster
shell: bash
run: |
if [ -n "${{ inputs.kind-cluster }}" ]; then
echo "name=${{ inputs.kind-cluster }}" >> $GITHUB_OUTPUT
echo "skip=true" >> $GITHUB_OUTPUT
else
echo "name=reloader-loadtest" >> $GITHUB_OUTPUT
echo "skip=false" >> $GITHUB_OUTPUT
fi
- name: Load images into Kind
shell: bash
run: |
CLUSTER="${{ steps.cluster.outputs.name }}"
if [ -n "${{ steps.images.outputs.old }}" ]; then
echo "Loading old image: ${{ steps.images.outputs.old }}"
kind load docker-image "${{ steps.images.outputs.old }}" --name "$CLUSTER" || true
fi
echo "Loading new image: ${{ steps.images.outputs.new }}"
kind load docker-image "${{ steps.images.outputs.new }}" --name "$CLUSTER" || true
- name: Run load tests
id: run
shell: bash
run: |
cd ${{ github.workspace }}/test/loadtest
ARGS="--new-image=${{ steps.images.outputs.new }}"
ARGS="$ARGS --scenario=${{ inputs.scenarios }}"
ARGS="$ARGS --duration=${{ inputs.duration }}"
ARGS="$ARGS --cluster-name=${{ steps.cluster.outputs.name }}"
ARGS="$ARGS --skip-image-load"
if [ -n "${{ steps.images.outputs.old }}" ]; then
ARGS="$ARGS --old-image=${{ steps.images.outputs.old }}"
fi
if [ "${{ steps.cluster.outputs.skip }}" = "true" ]; then
ARGS="$ARGS --skip-cluster"
fi
echo "Running: ./loadtest run $ARGS"
if ./loadtest run $ARGS; then
echo "status=pass" >> $GITHUB_OUTPUT
else
echo "status=fail" >> $GITHUB_OUTPUT
fi
- name: Generate summary
id: summary
shell: bash
run: |
cd ${{ github.workspace }}/test/loadtest
# Generate markdown summary
./loadtest summary \
--results-dir=./results \
--test-type=${{ inputs.test-type }} \
--format=markdown > summary.md 2>/dev/null || true
# Output to GitHub Step Summary
cat summary.md >> $GITHUB_STEP_SUMMARY
# Store summary for output (using heredoc for multiline)
{
echo 'summary<<EOF'
cat summary.md
echo 'EOF'
} >> $GITHUB_OUTPUT
# Get pass/fail counts from JSON
COUNTS=$(./loadtest summary --format=json 2>/dev/null | head -20 || echo '{}')
echo "pass_count=$(echo "$COUNTS" | grep -o '"pass_count": [0-9]*' | grep -o '[0-9]*' || echo 0)" >> $GITHUB_OUTPUT
echo "fail_count=$(echo "$COUNTS" | grep -o '"fail_count": [0-9]*' | grep -o '[0-9]*' || echo 0)" >> $GITHUB_OUTPUT
- name: Post PR comment
if: inputs.post-comment == 'true' && inputs.pr-number != ''
continue-on-error: true
uses: actions/github-script@v7
with:
github-token: ${{ inputs.github-token }}
script: |
const fs = require('fs');
const summaryPath = '${{ github.workspace }}/test/loadtest/summary.md';
let summary = 'No results available';
try {
summary = fs.readFileSync(summaryPath, 'utf8');
} catch (e) {
console.log('Could not read summary file:', e.message);
}
const header = '${{ inputs.comment-header }}';
const status = '${{ steps.run.outputs.status }}';
const statusEmoji = status === 'pass' ? ':white_check_mark:' : ':x:';
const body = [
header ? header : `## ${statusEmoji} Load Test Results (${{ inputs.test-type }})`,
'',
summary,
'',
'---',
`**Artifacts:** [Download](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})`,
].join('\n');
try {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: ${{ inputs.pr-number }},
body: body
});
console.log('Comment posted successfully');
} catch (error) {
if (error.status === 403) {
console.log('Could not post comment (fork PR with restricted permissions). Use /loadtest command to run with comment posting.');
} else {
throw error;
}
}
- name: Upload results
uses: actions/upload-artifact@v4
if: always()
with:
name: loadtest-${{ inputs.test-type }}-results
path: |
${{ github.workspace }}/test/loadtest/results/
retention-days: 30
- name: Cleanup Kind cluster (only if we created it)
if: always() && steps.cluster.outputs.skip == 'false'
shell: bash
run: |
kind delete cluster --name ${{ steps.cluster.outputs.name }} || true

112
.github/workflows/loadtest.yml vendored Normal file
View File

@@ -0,0 +1,112 @@
name: Load Test (Full)
on:
issue_comment:
types: [created]
permissions:
contents: read
pull-requests: write
issues: write
jobs:
loadtest:
# Only run on PR comments with /loadtest command
if: |
github.event.issue.pull_request &&
contains(github.event.comment.body, '/loadtest')
runs-on: ubuntu-latest
steps:
- name: Add reaction to comment
uses: actions/github-script@v7
with:
script: |
await github.rest.reactions.createForIssueComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: context.payload.comment.id,
content: 'rocket'
});
- name: Get PR details
id: pr
uses: actions/github-script@v7
with:
script: |
const pr = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.issue.number
});
core.setOutput('head_ref', pr.data.head.ref);
core.setOutput('head_sha', pr.data.head.sha);
core.setOutput('base_ref', pr.data.base.ref);
core.setOutput('base_sha', pr.data.base.sha);
console.log(`PR #${context.issue.number}: ${pr.data.head.ref} -> ${pr.data.base.ref}`);
- name: Checkout PR branch
uses: actions/checkout@v4
with:
ref: ${{ steps.pr.outputs.head_sha }}
fetch-depth: 0 # Full history for building from base ref
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.26'
cache: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install kind
run: |
curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.20.0/kind-linux-amd64
chmod +x ./kind
sudo mv ./kind /usr/local/bin/kind
- name: Install kubectl
run: |
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/kubectl
- name: Run full A/B comparison load test
id: loadtest
uses: ./.github/actions/loadtest
with:
old-ref: ${{ steps.pr.outputs.base_sha }}
new-ref: ${{ steps.pr.outputs.head_sha }}
scenarios: 'all'
test-type: 'full'
post-comment: 'true'
pr-number: ${{ github.event.issue.number }}
comment-header: |
## Load Test Results (Full A/B Comparison)
**Comparing:** `${{ steps.pr.outputs.base_ref }}` → `${{ steps.pr.outputs.head_ref }}`
**Triggered by:** @${{ github.event.comment.user.login }}
- name: Add success reaction
if: steps.loadtest.outputs.status == 'pass'
uses: actions/github-script@v7
with:
script: |
await github.rest.reactions.createForIssueComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: context.payload.comment.id,
content: '+1'
});
- name: Add failure reaction
if: steps.loadtest.outputs.status == 'fail'
uses: actions/github-script@v7
with:
script: |
await github.rest.reactions.createForIssueComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: context.payload.comment.id,
content: '-1'
});

View File

@@ -22,6 +22,7 @@ env:
KUBERNETES_VERSION: "1.30.0"
KIND_VERSION: "0.23.0"
REGISTRY: ghcr.io
RELOADER_EDITION: oss
jobs:
qa:
@@ -35,6 +36,8 @@ jobs:
permissions:
contents: read
pull-requests: write
issues: write
runs-on: ubuntu-latest
name: Build
@@ -109,6 +112,17 @@ jobs:
- name: Test
run: make test
- name: Run quick A/B load tests
uses: ./.github/actions/loadtest
with:
old-ref: ${{ github.event.pull_request.base.sha }}
# new-ref defaults to current checkout (PR branch)
scenarios: 'S1,S4,S6'
test-type: 'quick'
kind-cluster: 'kind' # Use the existing cluster created above
post-comment: 'true'
pr-number: ${{ github.event.pull_request.number }}
- name: Generate Tags
id: generate_tag
run: |
@@ -141,6 +155,7 @@ jobs:
VERSION=merge-${{ steps.generate_tag.outputs.GIT_TAG }}
COMMIT=${{github.event.pull_request.head.sha}}
BUILD_DATE=${{ steps.prep.outputs.created }}
EDITION=${{ env.RELOADER_EDITION }}
BUILD_PARAMETERS=${{ env.BUILD_PARAMETERS }}
cache-to: type=inline
@@ -160,6 +175,10 @@ jobs:
pull: true
push: false
build-args: |
VERSION=merge-${{ steps.generate_tag.outputs.GIT_UBI_TAG }}
COMMIT=${{github.event.pull_request.head.sha}}
BUILD_DATE=${{ steps.prep.outputs.created }}
EDITION=${{ env.RELOADER_EDITION }}
BUILD_PARAMETERS=${{ env.BUILD_PARAMETERS }}
BUILDER_IMAGE=${{ env.GHCR_IMAGE_REPOSITORY }}:${{ steps.highest_tag.outputs.tag }}
cache-to: type=inline

View File

@@ -15,6 +15,7 @@ env:
KIND_VERSION: "0.23.0"
HELM_REGISTRY_URL: "https://stakater.github.io/stakater-charts"
REGISTRY: ghcr.io
RELOADER_EDITION: oss
jobs:
build:
@@ -103,7 +104,12 @@ jobs:
file: ${{ env.DOCKER_FILE_PATH }}
pull: true
push: true
build-args: BUILD_PARAMETERS=${{ env.BUILD_PARAMETERS }}
build-args: |
VERSION=merge-${{ github.event.number }}
COMMIT=${{ github.sha }}
BUILD_DATE=${{ steps.prep.outputs.created }}
EDITION=${{ env.RELOADER_EDITION }}
BUILD_PARAMETERS=${{ env.BUILD_PARAMETERS }}
cache-to: type=inline
platforms: linux/amd64,linux/arm,linux/arm64
tags: |
@@ -152,6 +158,7 @@ jobs:
VERSION=merge-${{ github.event.number }}
COMMIT=${{ github.sha }}
BUILD_DATE=${{ steps.prep.outputs.created }}
EDITION=${{ env.RELOADER_EDITION }}
BUILD_PARAMETERS=${{ env.BUILD_PARAMETERS }}
cache-to: type=inline
platforms: linux/amd64,linux/arm,linux/arm64

View File

@@ -11,6 +11,7 @@ env:
KUBERNETES_VERSION: "1.30.0"
KIND_VERSION: "0.23.0"
REGISTRY: ghcr.io
RELOADER_EDITION: oss
jobs:
release:
@@ -110,6 +111,7 @@ jobs:
VERSION=${{ steps.generate_tag.outputs.RELEASE_VERSION }}
COMMIT=${{ github.sha }}
BUILD_DATE=${{ steps.prep.outputs.created }}
EDITION=${{ env.RELOADER_EDITION }}
labels: |
org.opencontainers.image.source=${{ github.event.repository.clone_url }}
org.opencontainers.image.created=${{ steps.prep.outputs.created }}
@@ -160,6 +162,7 @@ jobs:
VERSION=${{ steps.generate_tag.outputs.RELEASE_VERSION }}
COMMIT=${{ github.sha }}
BUILD_DATE=${{ steps.prep.outputs.created }}
EDITION=${{ env.RELOADER_EDITION }}
labels: |
org.opencontainers.image.source=${{ github.event.repository.clone_url }}
org.opencontainers.image.created=${{ steps.prep.outputs.created }}

7
.gitignore vendored
View File

@@ -11,9 +11,14 @@ vendor
dist
Reloader
!**/chart/reloader
!**/internal/reloader
*.tgz
styles/
site/
/mkdocs.yml
yq
bin
bin
test/loadtest/results
test/loadtest/loadtest
# Temporary NFS files
.nfs*

View File

@@ -2,7 +2,7 @@ ARG BUILDER_IMAGE
ARG BASE_IMAGE
# Build the manager binary
FROM --platform=${BUILDPLATFORM} ${BUILDER_IMAGE:-golang:1.25.5} AS builder
FROM --platform=${BUILDPLATFORM} ${BUILDER_IMAGE:-golang:1.26} AS builder
ARG TARGETOS
ARG TARGETARCH
@@ -12,6 +12,7 @@ ARG GOPRIVATE
ARG COMMIT
ARG VERSION
ARG BUILD_DATE
ARG EDITION=oss
WORKDIR /workspace
@@ -36,7 +37,8 @@ RUN CGO_ENABLED=0 \
GO111MODULE=on \
go build -ldflags="-s -w -X github.com/stakater/Reloader/pkg/common.Version=${VERSION} \
-X github.com/stakater/Reloader/pkg/common.Commit=${COMMIT} \
-X github.com/stakater/Reloader/pkg/common.BuildDate=${BUILD_DATE}" \
-X github.com/stakater/Reloader/pkg/common.BuildDate=${BUILD_DATE} \
-X github.com/stakater/Reloader/pkg/common.Edition=${EDITION}" \
-installsuffix 'static' -mod=mod -a -o manager ./
# Use distroless as minimal base image to package the manager binary

View File

@@ -3,7 +3,7 @@ ARG BASE_IMAGE
FROM --platform=${BUILDPLATFORM} ${BUILDER_IMAGE} AS SRC
FROM ${BASE_IMAGE:-registry.access.redhat.com/ubi9/ubi:latest} AS ubi
FROM ${BASE_IMAGE:-registry.access.redhat.com/ubi9/ubi:9.7} AS ubi
ARG TARGETARCH
@@ -31,8 +31,10 @@ RUN while IFS= read -r file; do \
tar -chf /tmp/files.tar --exclude='etc/pki/entitlement-host*' -T /tmp/existing-files.txt 2>&1 | grep -vE "(File removed before we read it|Cannot stat)" || true; \
if [ -f /tmp/files.tar ]; then \
tar xf /tmp/files.tar -C /image/ 2>/dev/null || true; \
rm -f /tmp/files.tar; \
fi; \
fi
fi && \
rm -f /tmp/existing-files.txt
# Generate a rpm database which contains all the packages that you said were needed in ubi-build-files-*.txt
RUN rpm --root /image --initdb \

View File

@@ -169,3 +169,43 @@ yq-install:
@curl -sL $(YQ_DOWNLOAD_URL) -o $(YQ_BIN)
@chmod +x $(YQ_BIN)
@echo "yq $(YQ_VERSION) installed at $(YQ_BIN)"
# =============================================================================
# Load Testing
# =============================================================================
LOADTEST_BIN = test/loadtest/loadtest
LOADTEST_OLD_IMAGE ?= localhost/reloader:old
LOADTEST_NEW_IMAGE ?= localhost/reloader:new
LOADTEST_DURATION ?= 60
LOADTEST_SCENARIOS ?= all
.PHONY: loadtest-build loadtest-quick loadtest-full loadtest loadtest-clean
loadtest-build: ## Build loadtest binary
cd test/loadtest && $(GOCMD) build -o loadtest ./cmd/loadtest
loadtest-quick: loadtest-build ## Run quick load tests (S1, S4, S6)
cd test/loadtest && ./loadtest run \
--old-image=$(LOADTEST_OLD_IMAGE) \
--new-image=$(LOADTEST_NEW_IMAGE) \
--scenario=S1,S4,S6 \
--duration=$(LOADTEST_DURATION)
loadtest-full: loadtest-build ## Run full load test suite
cd test/loadtest && ./loadtest run \
--old-image=$(LOADTEST_OLD_IMAGE) \
--new-image=$(LOADTEST_NEW_IMAGE) \
--scenario=all \
--duration=$(LOADTEST_DURATION)
loadtest: loadtest-build ## Run load tests with configurable scenarios (default: all)
cd test/loadtest && ./loadtest run \
--old-image=$(LOADTEST_OLD_IMAGE) \
--new-image=$(LOADTEST_NEW_IMAGE) \
--scenario=$(LOADTEST_SCENARIOS) \
--duration=$(LOADTEST_DURATION)
loadtest-clean: ## Clean loadtest binary and results
rm -f $(LOADTEST_BIN)
rm -rf test/loadtest/results

View File

@@ -1 +1 @@
1.4.12
1.4.13

View File

@@ -1,8 +1,8 @@
apiVersion: v1
name: reloader
description: Reloader chart that runs on kubernetes
version: 2.2.7
appVersion: v1.4.12
version: 2.2.8
appVersion: v1.4.13
keywords:
- Reloader
- kubernetes

View File

@@ -19,7 +19,7 @@ fullnameOverride: ""
image:
name: stakater/reloader
repository: ghcr.io/stakater/reloader
tag: v1.4.12
tag: v1.4.13
# digest: sha256:1234567
pullPolicy: IfNotPresent
@@ -133,7 +133,7 @@ reloader:
labels:
provider: stakater
group: com.stakater.platform
version: v1.4.12
version: v1.4.13
# Support for extra environment variables.
env:
# Open supports Key value pair as environment variables.

View File

@@ -17,7 +17,7 @@ spec:
app: reloader-reloader
spec:
containers:
- image: "ghcr.io/stakater/reloader:v1.4.12"
- image: "ghcr.io/stakater/reloader:v1.4.13"
imagePullPolicy: IfNotPresent
name: reloader-reloader
env:

View File

@@ -141,7 +141,7 @@ spec:
fieldPath: metadata.namespace
- name: RELOADER_DEPLOYMENT_NAME
value: reloader-reloader
image: ghcr.io/stakater/reloader:v1.4.12
image: ghcr.io/stakater/reloader:v1.4.13
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5

View File

@@ -76,7 +76,7 @@ Note: Rolling upgrade also works in the same way for secrets.
### Hash Value Computation
Reloader uses SHA512 to compute hash value. SHA1 is used because it is efficient and less prone to collision.
Reloader uses SHA1 to compute hash value. SHA1 is used because it is efficient and less prone to collision.
## Monitor All Namespaces
@@ -90,4 +90,4 @@ The output file can then be used to deploy Reloader in specific namespace.
## Compatibility With Helm Install and Upgrade
Reloader has no impact on helm deployment cycle. Reloader only injects an environment variable in `deployment`, `daemonset` or `statefulset`. The environment variable contains the SHA512 value of `ConfigMaps` or `Secrets` data. So if a deployment is created using Helm and Reloader updates the deployment, then next time you upgrade the helm release, Reloader will do nothing except changing that environment variable value in `deployment` , `daemonset` or `statefulset`.
Reloader has no impact on helm deployment cycle. Reloader only injects an environment variable in `deployment`, `daemonset` or `statefulset`. The environment variable contains the SHA1 value of `ConfigMaps` or `Secrets` data. So if a deployment is created using Helm and Reloader updates the deployment, then next time you upgrade the helm release, Reloader will do nothing except changing that environment variable value in `deployment` , `daemonset` or `statefulset`.

View File

@@ -2,10 +2,10 @@
Reloader is inspired from [`configmapcontroller`](https://github.com/fabric8io/configmapcontroller) but there are many ways in which it differs from `configmapcontroller`. Below is the small comparison between these two controllers.
| Reloader | ConfigMap |
|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Reloader can watch both `Secrets` and `ConfigMaps`. | `configmapcontroller` can only watch changes in `ConfigMaps`. It cannot detect changes in other resources like `Secrets`. |
| Reloader can perform rolling upgrades on `deployments` as well as on `statefulsets` and `daemonsets` | `configmapcontroller` can only perform rolling upgrades on `deployments`. It currently does not support rolling upgrades on `statefulsets` and `daemonsets` |
| Reloader provides both unit test cases and end to end integration test cases for future updates. So one can make sure that new changes do not break any old functionality. | Currently there are not any unit test cases or end to end integration test cases in `configmap-controller`. It adds difficulties for any additional updates in `configmap-controller` and one can not know for sure whether new changes breaks any old functionality or not. |
| Reloader uses SHA512 to encode the change in `ConfigMap` or `Secret`. It then saves the SHA1 value in `STAKATER_FOO_CONFIGMAP` or `STAKATER_FOO_SECRET` environment variable depending upon where the change has happened. The use of SHA1 provides a concise 40 characters encoded value that is very less prone to collision. | `configmap-controller` uses `FABRICB_FOO_REVISION` environment variable to store any change in `ConfigMap` controller. It does not encode it or convert it in suitable hash value to avoid data pollution in deployment. |
| Reloader allows you to customize your own annotation (for both `Secrets` and `ConfigMaps`) using command line flags | `configmap-controller` restricts you to only their provided annotation |
| Reloader | ConfigMap |
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Reloader can watch both `Secrets` and `ConfigMaps`. | `configmapcontroller` can only watch changes in `ConfigMaps`. It cannot detect changes in other resources like `Secrets`. |
| Reloader can perform rolling upgrades on `deployments` as well as on `statefulsets` and `daemonsets` | `configmapcontroller` can only perform rolling upgrades on `deployments`. It currently does not support rolling upgrades on `statefulsets` and `daemonsets` |
| Reloader provides both unit test cases and end to end integration test cases for future updates. So one can make sure that new changes do not break any old functionality. | Currently there are not any unit test cases or end to end integration test cases in `configmap-controller`. It adds difficulties for any additional updates in `configmap-controller` and one can not know for sure whether new changes breaks any old functionality or not. |
| Reloader uses SHA1 to encode the change in `ConfigMap` or `Secret`. It then saves the SHA1 value in `STAKATER_FOO_CONFIGMAP` or `STAKATER_FOO_SECRET` environment variable depending upon where the change has happened. The use of SHA1 provides a concise 40 characters encoded value that is very less prone to collision. | `configmap-controller` uses `FABRICB_FOO_REVISION` environment variable to store any change in `ConfigMap` controller. It does not encode it or convert it in suitable hash value to avoid data pollution in deployment. |
| Reloader allows you to customize your own annotation (for both `Secrets` and `ConfigMaps`) using command line flags | `configmap-controller` restricts you to only their provided annotation |

View File

@@ -6,7 +6,7 @@ Reloader and k8s-trigger-controller are both built for same purpose. So there ar
- Both controllers support change detection in `ConfigMaps` and `Secrets`
- Both controllers support deployment `rollout`
- Reloader controller use SHA512 for hashing
- Reloader controller use SHA1 for hashing
- Both controllers have end to end as well as unit test cases.
## Differences

4
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/stakater/Reloader
go 1.25.5
go 1.26
require (
github.com/argoproj/argo-rollouts v1.8.3
@@ -15,7 +15,6 @@ require (
k8s.io/apimachinery v0.35.0
k8s.io/client-go v0.35.0
k8s.io/kubectl v0.35.0
k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2
sigs.k8s.io/secrets-store-csi-driver v1.5.5
)
@@ -65,6 +64,7 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
k8s.io/utils v0.0.0-20251222233032-718f0e51e6d2 // indirect
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect

View File

@@ -110,6 +110,7 @@ func NewController(
// Add function to add a new object to the queue in case of creating a resource
func (c *Controller) Add(obj interface{}) {
c.collectors.RecordEventReceived("add", c.resource)
switch object := obj.(type) {
case *v1.Namespace:
@@ -121,11 +122,14 @@ func (c *Controller) Add(obj interface{}) {
if options.ReloadOnCreate == "true" {
if !c.resourceInIgnoredNamespace(obj) && c.resourceInSelectedNamespaces(obj) && secretControllerInitialized && configmapControllerInitialized {
c.queue.Add(handler.ResourceCreatedHandler{
Resource: obj,
Collectors: c.collectors,
Recorder: c.recorder,
c.enqueue(handler.ResourceCreatedHandler{
Resource: obj,
Collectors: c.collectors,
Recorder: c.recorder,
EnqueueTime: time.Now(),
})
} else {
c.collectors.RecordSkipped("ignored_or_not_selected")
}
}
}
@@ -181,34 +185,44 @@ func (c *Controller) removeSelectedNamespaceFromCache(namespace v1.Namespace) {
// Update function to add an old object and a new object to the queue in case of updating a resource
func (c *Controller) Update(old interface{}, new interface{}) {
c.collectors.RecordEventReceived("update", c.resource)
switch new.(type) {
case *v1.Namespace:
return
}
if !c.resourceInIgnoredNamespace(new) && c.resourceInSelectedNamespaces(new) {
c.queue.Add(handler.ResourceUpdatedHandler{
c.enqueue(handler.ResourceUpdatedHandler{
Resource: new,
OldResource: old,
Collectors: c.collectors,
Recorder: c.recorder,
EnqueueTime: time.Now(),
})
} else {
c.collectors.RecordSkipped("ignored_or_not_selected")
}
}
// Delete function to add an object to the queue in case of deleting a resource
func (c *Controller) Delete(old interface{}) {
c.collectors.RecordEventReceived("delete", c.resource)
if _, ok := old.(*csiv1.SecretProviderClassPodStatus); ok {
return
}
if options.ReloadOnDelete == "true" {
if !c.resourceInIgnoredNamespace(old) && c.resourceInSelectedNamespaces(old) && secretControllerInitialized && configmapControllerInitialized {
c.queue.Add(handler.ResourceDeleteHandler{
Resource: old,
Collectors: c.collectors,
Recorder: c.recorder,
c.enqueue(handler.ResourceDeleteHandler{
Resource: old,
Collectors: c.collectors,
Recorder: c.recorder,
EnqueueTime: time.Now(),
})
} else {
c.collectors.RecordSkipped("ignored_or_not_selected")
}
}
@@ -219,6 +233,13 @@ func (c *Controller) Delete(old interface{}) {
}
}
// enqueue adds an item to the queue and records metrics
func (c *Controller) enqueue(item interface{}) {
c.queue.Add(item)
c.collectors.RecordQueueAdd()
c.collectors.SetQueueDepth(c.queue.Len())
}
// Run function for controller which handles the queue
func (c *Controller) Run(threadiness int, stopCh chan struct{}) {
defer runtime.HandleCrash()
@@ -260,13 +281,34 @@ func (c *Controller) processNextItem() bool {
if quit {
return false
}
c.collectors.SetQueueDepth(c.queue.Len())
// Tell the queue that we are done with processing this key. This unblocks the key for other workers
// This allows safe parallel processing because two events with the same key are never processed in
// parallel.
defer c.queue.Done(resourceHandler)
// Record queue latency if the handler supports it
if h, ok := resourceHandler.(handler.TimedHandler); ok {
queueLatency := time.Since(h.GetEnqueueTime())
c.collectors.RecordQueueLatency(queueLatency)
}
// Track reconcile/handler duration
startTime := time.Now()
// Invoke the method containing the business logic
err := resourceHandler.(handler.ResourceHandler).Handle()
duration := time.Since(startTime)
if err != nil {
c.collectors.RecordReconcile("error", duration)
} else {
c.collectors.RecordReconcile("success", duration)
}
// Handle the error if something went wrong during the execution of the business logic
c.handleErr(err, resourceHandler)
return true
@@ -279,16 +321,26 @@ func (c *Controller) handleErr(err error, key interface{}) {
// This ensures that future processing of updates for this key is not delayed because of
// an outdated error history.
c.queue.Forget(key)
// Record successful event processing
c.collectors.RecordEventProcessed("unknown", c.resource, "success")
return
}
// Record error
c.collectors.RecordError("handler_error")
// This controller retries 5 times if something goes wrong. After that, it stops trying.
if c.queue.NumRequeues(key) < 5 {
logrus.Errorf("Error syncing events: %v", err)
// Record retry
c.collectors.RecordRetry()
// Re-enqueue the key rate limited. Based on the rate limiter on the
// queue and the re-enqueue history, the key will be processed later again.
c.queue.AddRateLimited(key)
c.collectors.SetQueueDepth(c.queue.Len())
return
}
@@ -297,6 +349,8 @@ func (c *Controller) handleErr(err error, key interface{}) {
runtime.HandleError(err)
logrus.Errorf("Dropping key out of the queue: %v", err)
logrus.Debugf("Dropping the key %q out of the queue: %v", key, err)
c.collectors.RecordEventProcessed("unknown", c.resource, "dropped")
}
func getClientForResource(resource string, coreClient kubernetes.Interface) (cache.Getter, error) {

View File

@@ -2582,19 +2582,21 @@ func TestController_resourceInIgnoredNamespace(t *testing.T) {
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := &Controller{
client: tt.fields.client,
indexer: tt.fields.indexer,
queue: tt.fields.queue,
informer: tt.fields.informer,
namespace: tt.fields.namespace,
ignoredNamespaces: tt.fields.ignoredNamespaces,
}
if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want {
t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want)
}
})
t.Run(
tt.name, func(t *testing.T) {
c := &Controller{
client: tt.fields.client,
indexer: tt.fields.indexer,
queue: tt.fields.queue,
informer: tt.fields.informer,
namespace: tt.fields.namespace,
ignoredNamespaces: tt.fields.ignoredNamespaces,
}
if got := c.resourceInIgnoredNamespace(tt.args.raw); got != tt.want {
t.Errorf("Controller.resourceInIgnoredNamespace() = %v, want %v", got, tt.want)
}
},
)
}
}
@@ -2756,35 +2758,37 @@ func TestController_resourceInNamespaceSelector(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fakeClient := fake.NewClientset()
namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{})
logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name)
t.Run(
tt.name, func(t *testing.T) {
fakeClient := fake.NewClientset()
namespace, _ := fakeClient.CoreV1().Namespaces().Create(context.Background(), &tt.fields.namespace, metav1.CreateOptions{})
logrus.Infof("created fakeClient namespace for testing = %s", namespace.Name)
c := &Controller{
client: fakeClient,
indexer: tt.fields.indexer,
queue: tt.fields.queue,
informer: tt.fields.informer,
namespace: tt.fields.namespace.Name,
namespaceSelector: tt.fields.namespaceSelector,
}
c := &Controller{
client: fakeClient,
indexer: tt.fields.indexer,
queue: tt.fields.queue,
informer: tt.fields.informer,
namespace: tt.fields.namespace.Name,
namespaceSelector: tt.fields.namespaceSelector,
}
listOptions := metav1.ListOptions{}
listOptions.LabelSelector = tt.fields.namespaceSelector
namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions)
listOptions := metav1.ListOptions{}
listOptions.LabelSelector = tt.fields.namespaceSelector
namespaces, _ := fakeClient.CoreV1().Namespaces().List(context.Background(), listOptions)
for _, ns := range namespaces.Items {
c.addSelectedNamespaceToCache(ns)
}
for _, ns := range namespaces.Items {
c.addSelectedNamespaceToCache(ns)
}
if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want {
t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want)
}
if got := c.resourceInSelectedNamespaces(tt.args.raw); got != tt.want {
t.Errorf("Controller.resourceInNamespaceSelector() = %v, want %v", got, tt.want)
}
for _, ns := range namespaces.Items {
c.removeSelectedNamespaceFromCache(ns)
}
})
for _, ns := range namespaces.Items {
c.removeSelectedNamespaceFromCache(ns)
}
},
)
}
}

View File

@@ -1,14 +1,20 @@
package crypto
import (
"crypto/sha512"
"encoding/hex"
"crypto/sha1"
"fmt"
"io"
"github.com/sirupsen/logrus"
)
// GenerateSHA generates SHA from string
// Always returns a hash value, even for empty strings, to ensure consistent behavior
// and avoid issues with string matching operations (e.g., strings.Contains(str, "") always returns true)
func GenerateSHA(data string) string {
hash := sha512.Sum512_256([]byte(data))
return hex.EncodeToString(hash[:])
hasher := sha1.New()
_, err := io.WriteString(hasher, data)
if err != nil {
logrus.Errorf("Unable to write data in hash writer %v", err)
}
sha := hasher.Sum(nil)
return fmt.Sprintf("%x", sha)
}

View File

@@ -7,7 +7,7 @@ import (
// TestGenerateSHA generates the sha from given data and verifies whether it is correct or not
func TestGenerateSHA(t *testing.T) {
data := "www.stakater.com"
sha := "2e9aa975331b22861b4f62b7fcc69b63e001f938361fee3b4ed888adf26a10e3"
sha := "abd4ed82fb04548388a6cf3c339fd9dc84d275df"
result := GenerateSHA(data)
if result != sha {
t.Errorf("Failed to generate SHA")
@@ -18,11 +18,11 @@ func TestGenerateSHA(t *testing.T) {
// This ensures consistent behavior and avoids issues with string matching operations
func TestGenerateSHAEmptyString(t *testing.T) {
result := GenerateSHA("")
expected := "c672b8d1ef56ed28ab87c3622c5114069bdd3ad7b8f9737498d0c01ecef0967a"
expected := "da39a3ee5e6b4b0d3255bfef95601890afd80709"
if result != expected {
t.Errorf("Failed to generate SHA for empty string. Expected: %s, Got: %s", expected, result)
}
if len(result) != 64 {
t.Errorf("SHA hash should be 64 characters long, got %d", len(result))
if len(result) != 40 {
t.Errorf("SHA hash should be 40 characters long, got %d", len(result))
}
}

View File

@@ -1,6 +1,8 @@
package handler
import (
"time"
"github.com/sirupsen/logrus"
"github.com/stakater/Reloader/internal/pkg/metrics"
"github.com/stakater/Reloader/internal/pkg/options"
@@ -11,25 +13,46 @@ import (
// ResourceCreatedHandler contains new objects
type ResourceCreatedHandler struct {
Resource interface{}
Collectors metrics.Collectors
Recorder record.EventRecorder
Resource interface{}
Collectors metrics.Collectors
Recorder record.EventRecorder
EnqueueTime time.Time // Time when this handler was added to the queue
}
// GetEnqueueTime returns when this handler was enqueued
func (r ResourceCreatedHandler) GetEnqueueTime() time.Time {
return r.EnqueueTime
}
// Handle processes the newly created resource
func (r ResourceCreatedHandler) Handle() error {
startTime := time.Now()
result := "error"
defer func() {
r.Collectors.RecordReconcile(result, time.Since(startTime))
}()
if r.Resource == nil {
logrus.Errorf("Resource creation handler received nil resource")
} else {
config, _ := r.GetConfig()
// Send webhook
if options.WebhookUrl != "" {
return sendUpgradeWebhook(config, options.WebhookUrl)
}
// process resource based on its type
return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
return nil
}
return nil
config, _ := r.GetConfig()
// Send webhook
if options.WebhookUrl != "" {
err := sendUpgradeWebhook(config, options.WebhookUrl)
if err == nil {
result = "success"
}
return err
}
// process resource based on its type
err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
if err == nil {
result = "success"
}
return err
}
// GetConfig gets configurations containing SHA, annotations, namespace and resource name

View File

@@ -3,6 +3,7 @@ package handler
import (
"fmt"
"slices"
"time"
"github.com/sirupsen/logrus"
"github.com/stakater/Reloader/internal/pkg/callbacks"
@@ -20,25 +21,46 @@ import (
// ResourceDeleteHandler contains new objects
type ResourceDeleteHandler struct {
Resource interface{}
Collectors metrics.Collectors
Recorder record.EventRecorder
Resource interface{}
Collectors metrics.Collectors
Recorder record.EventRecorder
EnqueueTime time.Time // Time when this handler was added to the queue
}
// GetEnqueueTime returns when this handler was enqueued
func (r ResourceDeleteHandler) GetEnqueueTime() time.Time {
return r.EnqueueTime
}
// Handle processes resources being deleted
func (r ResourceDeleteHandler) Handle() error {
startTime := time.Now()
result := "error"
defer func() {
r.Collectors.RecordReconcile(result, time.Since(startTime))
}()
if r.Resource == nil {
logrus.Errorf("Resource delete handler received nil resource")
} else {
config, _ := r.GetConfig()
// Send webhook
if options.WebhookUrl != "" {
return sendUpgradeWebhook(config, options.WebhookUrl)
}
// process resource based on its type
return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy)
return nil
}
return nil
config, _ := r.GetConfig()
// Send webhook
if options.WebhookUrl != "" {
err := sendUpgradeWebhook(config, options.WebhookUrl)
if err == nil {
result = "success"
}
return err
}
// process resource based on its type
err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeDeleteStrategy)
if err == nil {
result = "success"
}
return err
}
// GetConfig gets configurations containing SHA, annotations, namespace and resource name

View File

@@ -1,9 +1,18 @@
package handler
import "github.com/stakater/Reloader/pkg/common"
import (
"time"
"github.com/stakater/Reloader/pkg/common"
)
// ResourceHandler handles the creation and update of resources
type ResourceHandler interface {
Handle() error
GetConfig() (common.Config, string)
}
// TimedHandler is a handler that tracks when it was enqueued
type TimedHandler interface {
GetEnqueueTime() time.Time
}

View File

@@ -1,6 +1,8 @@
package handler
import (
"time"
"github.com/sirupsen/logrus"
"github.com/stakater/Reloader/internal/pkg/metrics"
"github.com/stakater/Reloader/internal/pkg/options"
@@ -17,23 +19,49 @@ type ResourceUpdatedHandler struct {
OldResource interface{}
Collectors metrics.Collectors
Recorder record.EventRecorder
EnqueueTime time.Time // Time when this handler was added to the queue
}
// GetEnqueueTime returns when this handler was enqueued
func (r ResourceUpdatedHandler) GetEnqueueTime() time.Time {
return r.EnqueueTime
}
// Handle processes the updated resource
func (r ResourceUpdatedHandler) Handle() error {
startTime := time.Now()
result := "error"
defer func() {
r.Collectors.RecordReconcile(result, time.Since(startTime))
}()
if r.Resource == nil || r.OldResource == nil {
logrus.Errorf("Resource update handler received nil resource")
} else {
config, oldSHAData := r.GetConfig()
if config.SHAValue != oldSHAData {
// Send a webhook if update
if options.WebhookUrl != "" {
return sendUpgradeWebhook(config, options.WebhookUrl)
}
// process resource based on its type
return doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
}
return nil
}
config, oldSHAData := r.GetConfig()
if config.SHAValue != oldSHAData {
// Send a webhook if update
if options.WebhookUrl != "" {
err := sendUpgradeWebhook(config, options.WebhookUrl)
if err == nil {
result = "success"
}
return err
}
// process resource based on its type
err := doRollingUpgrade(config, r.Collectors, r.Recorder, invokeReloadStrategy)
if err == nil {
result = "success"
}
return err
}
// No data change - skip
result = "skipped"
r.Collectors.RecordSkipped("no_data_change")
return nil
}

View File

@@ -9,6 +9,7 @@ import (
"io"
"os"
"strings"
"time"
"github.com/parnurzeal/gorequest"
"github.com/prometheus/client_golang/prometheus"
@@ -239,23 +240,35 @@ func rollingUpgrade(clients kube.Clients, config common.Config, upgradeFuncs cal
func PerformAction(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy) error {
items := upgradeFuncs.ItemsFunc(clients, config.Namespace)
// Record workloads scanned
collectors.RecordWorkloadsScanned(upgradeFuncs.ResourceType, len(items))
matchedCount := 0
for _, item := range items {
err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) error {
matched, err := retryOnConflict(retry.DefaultRetry, func(fetchResource bool) (bool, error) {
return upgradeResource(clients, config, upgradeFuncs, collectors, recorder, strategy, item, fetchResource)
})
if err != nil {
return err
}
if matched {
matchedCount++
}
}
// Record workloads matched
collectors.RecordWorkloadsMatched(upgradeFuncs.ResourceType, matchedCount)
return nil
}
func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error {
func retryOnConflict(backoff wait.Backoff, fn func(_ bool) (bool, error)) (bool, error) {
var lastError error
var matched bool
fetchResource := false // do not fetch resource on first attempt, already done by ItemsFunc
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
err := fn(fetchResource)
var err error
matched, err = fn(fetchResource)
fetchResource = true
switch {
case err == nil:
@@ -270,20 +283,22 @@ func retryOnConflict(backoff wait.Backoff, fn func(_ bool) error) error {
if wait.Interrupted(err) {
err = lastError
}
return err
return matched, err
}
func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) error {
func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, recorder record.EventRecorder, strategy invokeStrategy, resource runtime.Object, fetchResource bool) (bool, error) {
actionStartTime := time.Now()
accessor, err := meta.Accessor(resource)
if err != nil {
return err
return false, err
}
resourceName := accessor.GetName()
if fetchResource {
resource, err = upgradeFuncs.ItemFunc(clients, resourceName, config.Namespace)
if err != nil {
return err
return false, err
}
}
if config.Type == constants.SecretProviderClassEnvVarPostfix {
@@ -296,13 +311,14 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
if !result.ShouldReload {
logrus.Debugf("No changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace)
return nil
return false, nil
}
strategyResult := strategy(upgradeFuncs, resource, config, result.AutoReload)
if strategyResult.Result != constants.Updated {
return nil
collectors.RecordSkipped("strategy_not_updated")
return false, nil
}
// find correct annotation and update the resource
@@ -316,7 +332,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
_, err = PauseDeployment(deployment, clients, config.Namespace, pauseInterval)
if err != nil {
logrus.Errorf("Failed to pause deployment '%s' in namespace '%s': %v", resourceName, config.Namespace, err)
return err
return true, err
}
}
}
@@ -327,16 +343,19 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
err = upgradeFuncs.UpdateFunc(clients, config.Namespace, resource)
}
actionLatency := time.Since(actionStartTime)
if err != nil {
message := fmt.Sprintf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err)
logrus.Errorf("Update for '%s' of type '%s' in namespace '%s' failed with error %v", resourceName, upgradeFuncs.ResourceType, config.Namespace, err)
collectors.Reloaded.With(prometheus.Labels{"success": "false"}).Inc()
collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "false", "namespace": config.Namespace}).Inc()
collectors.RecordAction(upgradeFuncs.ResourceType, "error", actionLatency)
if recorder != nil {
recorder.Event(resource, v1.EventTypeWarning, "ReloadFail", message)
}
return err
return true, err
} else {
message := fmt.Sprintf("Changes detected in '%s' of type '%s' in namespace '%s'", config.ResourceName, config.Type, config.Namespace)
message += fmt.Sprintf(", Updated '%s' of type '%s' in namespace '%s'", resourceName, upgradeFuncs.ResourceType, config.Namespace)
@@ -345,6 +364,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
collectors.Reloaded.With(prometheus.Labels{"success": "true"}).Inc()
collectors.ReloadedByNamespace.With(prometheus.Labels{"success": "true", "namespace": config.Namespace}).Inc()
collectors.RecordAction(upgradeFuncs.ResourceType, "success", actionLatency)
alert_on_reload, ok := os.LookupEnv("ALERT_ON_RELOAD")
if recorder != nil {
recorder.Event(resource, v1.EventTypeNormal, "Reloaded", message)
@@ -357,7 +377,7 @@ func upgradeResource(clients kube.Clients, config common.Config, upgradeFuncs ca
}
}
return nil
return true, nil
}
func getVolumeMountName(volumes []v1.Volume, mountType string, volumeName string) string {

View File

@@ -108,9 +108,6 @@ var (
)
func TestMain(m *testing.M) {
// Skipping test sleep because fake clients are synchronous and don't need delays
// This significantly speeds up test execution (saves ~3-5 seconds per resource creation)
testutil.SkipTestSleeps = true
// Creating namespaces
testutil.CreateNamespace(arsNamespace, clients.KubernetesClient)
@@ -1873,7 +1870,7 @@ var labelFailed = prometheus.Labels{"success": "false"}
func testRollingUpgradeInvokeDeleteStrategyArs(t *testing.T, clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, envVarPostfix string) {
err := PerformAction(clients, config, upgradeFuncs, collectors, nil, invokeDeleteStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for %s with %s", upgradeFuncs.ResourceType, envVarPostfix)
}
@@ -1927,7 +1924,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapUsingArs(t *testing.T) {
}
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap")
}
@@ -1984,7 +1981,7 @@ func TestRollingUpgradeForDeploymentWithPatchAndRetryUsingArs(t *testing.T) {
assert.Equal(t, patchtypes.StrategicMergePatchType, patchType)
assert.NotEmpty(t, bytes)
assert.Contains(t, string(bytes), `{"spec":{"template":{"metadata":{"annotations":{"reloader.stakater.com/last-reloaded-from":`)
assert.Contains(t, string(bytes), `\"hash\":\"fd9e71a362056bfa864d9859e12978f893d330ce8cbf09218b25d015770ad91f\"`)
assert.Contains(t, string(bytes), `\"hash\":\"3c9a892aeaedc759abc3df9884a37b8be5680382\"`)
return nil
}
@@ -2017,7 +2014,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapWithoutReloadAnnotationAndWitho
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap")
}
@@ -2049,7 +2046,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapWithoutReloadAnnotationButWithA
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap")
}
@@ -2151,7 +2148,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapViaSearchAnnotationNoTriggersUs
logrus.Infof("Verifying deployment update")
updated := testutil.VerifyResourceAnnotationUpdate(clients, config, deploymentFuncs)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if updated {
t.Errorf("Deployment was updated unexpectedly")
}
@@ -2219,7 +2216,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapInInitContainerUsingArs(t *test
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap")
}
@@ -2251,7 +2248,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapInProjectVolumeInInitContainerU
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap in projected volume")
}
@@ -2283,7 +2280,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapAsEnvVarUsingArs(t *testing.T)
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap used as env var")
}
@@ -2315,7 +2312,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapAsEnvVarInInitContainerUsingArs
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap used as env var")
}
@@ -2347,7 +2344,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapAsEnvVarFromUsingArs(t *testing
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap used as env var")
}
@@ -2379,7 +2376,7 @@ func TestRollingUpgradeForDeploymentWithSecretUsingArs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -2411,7 +2408,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassUsingArs(t *testing.T
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with SecretProviderClass")
}
@@ -2443,7 +2440,7 @@ func TestRollingUpgradeForDeploymentWithSecretInProjectedVolumeUsingArs(t *testi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret in projected volume")
}
@@ -2475,7 +2472,7 @@ func TestRollingUpgradeForDeploymentWithSecretinInitContainerUsingArs(t *testing
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -2507,7 +2504,7 @@ func TestRollingUpgradeForDeploymentWithSecretproviderclassInInitContainerUsingA
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with SecretProviderClass")
}
@@ -2539,7 +2536,7 @@ func TestRollingUpgradeForDeploymentWithSecretInProjectedVolumeinInitContainerUs
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret in projected volume")
}
@@ -2571,7 +2568,7 @@ func TestRollingUpgradeForDeploymentWithSecretAsEnvVarUsingArs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -2603,7 +2600,7 @@ func TestRollingUpgradeForDeploymentWithSecretAsEnvVarFromUsingArs(t *testing.T)
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -2634,7 +2631,7 @@ func TestRollingUpgradeForDeploymentWithSecretAsEnvVarInInitContainerUsingArs(t
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -2708,7 +2705,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithSameConfi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with same config")
}
@@ -2721,7 +2718,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithSameConfi
logrus.Infof("Performing reload using same config")
err = PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Second rolling upgrade failed for Deployment with same config")
}
@@ -2743,7 +2740,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithDifferent
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with different config")
}
@@ -2759,7 +2756,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithDifferent
config.SHAValue = shaData
err = PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Second rolling upgrade failed for Deployment with different config")
}
@@ -2781,7 +2778,7 @@ func TestRollingUpgradeForDeploymentWithSecretAutoAnnotationUsingArs(t *testing.
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -2813,7 +2810,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassAutoAnnotationUsingAr
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with SecretProviderClass")
}
@@ -2866,7 +2863,7 @@ func TestRollingUpgradeForDeploymentWithConfigMapAutoAnnotationUsingArs(t *testi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with ConfigMap")
}
@@ -2910,7 +2907,7 @@ func TestRollingUpgradeForDaemonSetWithConfigmapUsingArs(t *testing.T) {
}
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap")
}
@@ -2967,7 +2964,7 @@ func TestRollingUpgradeForDaemonSetWithPatchAndRetryUsingArs(t *testing.T) {
assert.Equal(t, patchtypes.StrategicMergePatchType, patchType)
assert.NotEmpty(t, bytes)
assert.Contains(t, string(bytes), `{"spec":{"template":{"metadata":{"annotations":{"reloader.stakater.com/last-reloaded-from":`)
assert.Contains(t, string(bytes), `\"hash\":\"43bf9e30e7c4e32a8f8673c462b86d0b1ac626cf498afdc0d0108e79ebe7ee0c\"`)
assert.Contains(t, string(bytes), `\"hash\":\"314a2269170750a974d79f02b5b9ee517de7f280\"`)
return nil
}
@@ -3001,7 +2998,7 @@ func TestRollingUpgradeForDaemonSetWithConfigmapInProjectedVolumeUsingArs(t *tes
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap in projected volume")
}
@@ -3033,7 +3030,7 @@ func TestRollingUpgradeForDaemonSetWithConfigmapAsEnvVarUsingArs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap used as env var")
}
@@ -3065,7 +3062,7 @@ func TestRollingUpgradeForDaemonSetWithSecretUsingArs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with secret")
}
@@ -3097,7 +3094,7 @@ func TestRollingUpgradeForDaemonSetWithSecretProviderClassUsingArs(t *testing.T)
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with SecretProviderClass")
}
@@ -3129,7 +3126,7 @@ func TestRollingUpgradeForDaemonSetWithSecretInProjectedVolumeUsingArs(t *testin
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with secret in projected volume")
}
@@ -3173,7 +3170,7 @@ func TestRollingUpgradeForStatefulSetWithConfigmapUsingArs(t *testing.T) {
}
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with configmap")
}
@@ -3230,7 +3227,7 @@ func TestRollingUpgradeForStatefulSetWithPatchAndRetryUsingArs(t *testing.T) {
assert.Equal(t, patchtypes.StrategicMergePatchType, patchType)
assert.NotEmpty(t, bytes)
assert.Contains(t, string(bytes), `{"spec":{"template":{"metadata":{"annotations":{"reloader.stakater.com/last-reloaded-from":`)
assert.Contains(t, string(bytes), `\"hash\":\"6aa837180bdf6a93306c71a0cf62b4a45c2d5b021578247b3b64d5baea2b84d9\"`)
assert.Contains(t, string(bytes), `\"hash\":\"f821414d40d8815fb330763f74a4ff7ab651d4fa\"`)
return nil
}
@@ -3264,7 +3261,7 @@ func TestRollingUpgradeForStatefulSetWithConfigmapInProjectedVolumeUsingArs(t *t
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with configmap in projected volume")
}
@@ -3296,7 +3293,7 @@ func TestRollingUpgradeForStatefulSetWithSecretUsingArs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with secret")
}
@@ -3328,7 +3325,7 @@ func TestRollingUpgradeForStatefulSetWithSecretProviderClassUsingArs(t *testing.
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with SecretProviderClass: %v", err)
}
@@ -3360,7 +3357,7 @@ func TestRollingUpgradeForStatefulSetWithSecretInProjectedVolumeUsingArs(t *test
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with secret in projected volume")
}
@@ -3392,7 +3389,7 @@ func TestRollingUpgradeForDeploymentWithPodAnnotationsUsingArs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with pod annotations")
}
@@ -3525,7 +3522,7 @@ func TestIgnoreAnnotationNoReloadUsingErs(t *testing.T) {
func testRollingUpgradeInvokeDeleteStrategyErs(t *testing.T, clients kube.Clients, config common.Config, upgradeFuncs callbacks.RollingUpgradeFuncs, collectors metrics.Collectors, envVarPostfix string) {
err := PerformAction(clients, config, upgradeFuncs, collectors, nil, invokeDeleteStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for %s with %s", upgradeFuncs.ResourceType, envVarPostfix)
}
@@ -3568,7 +3565,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for %s with %s", deploymentFuncs.ResourceType, envVarPostfix)
}
@@ -3610,7 +3607,7 @@ func TestRollingUpgradeForDeploymentWithPatchAndRetryUsingErs(t *testing.T) {
assert.Equal(t, patchtypes.StrategicMergePatchType, patchType)
assert.NotEmpty(t, bytes)
assert.Contains(t, string(bytes), `{"spec":{"template":{"spec":{"containers":[{"name":`)
assert.Contains(t, string(bytes), `"value":"fd9e71a362056bfa864d9859e12978f893d330ce8cbf09218b25d015770ad91f"`)
assert.Contains(t, string(bytes), `"value":"3c9a892aeaedc759abc3df9884a37b8be5680382"`)
return nil
}
@@ -3712,7 +3709,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapViaSearchAnnotationNoTriggersUs
logrus.Infof("Verifying deployment update")
updated := testutil.VerifyResourceEnvVarUpdate(clients, config, envVarPostfix, deploymentFuncs)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if updated {
t.Errorf("Deployment was updated unexpectedly")
}
@@ -3780,7 +3777,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapInInitContainerUsingErs(t *test
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for %s with %s", deploymentFuncs.ResourceType, envVarPostfix)
}
@@ -3812,7 +3809,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapInProjectVolumeInInitContainerU
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap in projected volume")
}
@@ -3844,7 +3841,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapAsEnvVarUsingErs(t *testing.T)
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap used as env var")
}
@@ -3876,7 +3873,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapAsEnvVarInInitContainerUsingErs
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap used as env var")
}
@@ -3908,7 +3905,7 @@ func TestRollingUpgradeForDeploymentWithConfigmapAsEnvVarFromUsingErs(t *testing
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Configmap used as env var")
}
@@ -3940,7 +3937,7 @@ func TestRollingUpgradeForDeploymentWithSecretUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -3972,7 +3969,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassUsingErs(t *testing.T
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with SecretProviderClass")
}
@@ -4004,7 +4001,7 @@ func TestRollingUpgradeForDeploymentWithSecretInProjectedVolumeUsingErs(t *testi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret in projected volume")
}
@@ -4036,7 +4033,7 @@ func TestRollingUpgradeForDeploymentWithSecretinInitContainerUsingErs(t *testing
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -4068,7 +4065,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassinInitContainerUsingE
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with SecretProviderClass")
}
@@ -4100,7 +4097,7 @@ func TestRollingUpgradeForDeploymentWithSecretInProjectedVolumeinInitContainerUs
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret in projected volume")
}
@@ -4132,7 +4129,7 @@ func TestRollingUpgradeForDeploymentWithSecretAsEnvVarUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -4164,7 +4161,7 @@ func TestRollingUpgradeForDeploymentWithSecretAsEnvVarFromUsingErs(t *testing.T)
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -4196,7 +4193,7 @@ func TestRollingUpgradeForDeploymentWithSecretAsEnvVarInInitContainerUsingErs(t
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -4228,7 +4225,7 @@ func TestRollingUpgradeForDeploymentWithSecretExcludeAnnotationUsingErs(t *testi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with exclude Secret")
}
@@ -4250,7 +4247,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassExcludeAnnotationUsin
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with exclude SecretProviderClass")
}
@@ -4272,7 +4269,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithSameConfi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with same config")
}
@@ -4285,7 +4282,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithSameConfi
logrus.Infof("Performing reload using same config")
err = PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Second rolling upgrade failed for Deployment with same config")
}
@@ -4307,7 +4304,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithDifferent
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with different config")
}
@@ -4323,7 +4320,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassReloadedWithDifferent
config.SHAValue = shaData
err = PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Second rolling upgrade failed for Deployment with different config")
}
@@ -4345,7 +4342,7 @@ func TestRollingUpgradeForDeploymentWithSecretAutoAnnotationUsingErs(t *testing.
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with Secret")
}
@@ -4377,7 +4374,7 @@ func TestRollingUpgradeForDeploymentWithSecretProviderClassAutoAnnotationUsingEr
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with SecretProviderClass")
}
@@ -4409,7 +4406,7 @@ func TestRollingUpgradeForDeploymentWithConfigMapExcludeAnnotationUsingErs(t *te
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with exclude ConfigMap")
}
@@ -4431,7 +4428,7 @@ func TestRollingUpgradeForDeploymentWithConfigMapAutoAnnotationUsingErs(t *testi
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with ConfigMap")
}
@@ -4463,7 +4460,7 @@ func TestRollingUpgradeForDaemonSetWithConfigmapUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap")
}
@@ -4505,7 +4502,7 @@ func TestRollingUpgradeForDaemonSetWithPatchAndRetryUsingErs(t *testing.T) {
assert.Equal(t, patchtypes.StrategicMergePatchType, patchType)
assert.NotEmpty(t, bytes)
assert.Contains(t, string(bytes), `{"spec":{"template":{"spec":{"containers":[{"name":`)
assert.Contains(t, string(bytes), `"value":"43bf9e30e7c4e32a8f8673c462b86d0b1ac626cf498afdc0d0108e79ebe7ee0c"`)
assert.Contains(t, string(bytes), `"value":"314a2269170750a974d79f02b5b9ee517de7f280"`)
return nil
}
@@ -4517,7 +4514,7 @@ func TestRollingUpgradeForDaemonSetWithPatchAndRetryUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap")
}
@@ -4538,7 +4535,7 @@ func TestRollingUpgradeForDaemonSetWithConfigmapInProjectedVolumeUsingErs(t *tes
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap in projected volume")
}
@@ -4570,7 +4567,7 @@ func TestRollingUpgradeForDaemonSetWithConfigmapAsEnvVarUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with configmap used as env var")
}
@@ -4602,7 +4599,7 @@ func TestRollingUpgradeForDaemonSetWithSecretUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with secret")
}
@@ -4634,7 +4631,7 @@ func TestRollingUpgradeForDaemonSetWithSecretProviderClassUsingErs(t *testing.T)
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with SecretProviderClass")
}
@@ -4666,7 +4663,7 @@ func TestRollingUpgradeForDaemonSetWithSecretInProjectedVolumeUsingErs(t *testin
collectors := getCollectors()
err := PerformAction(clients, config, daemonSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for DaemonSet with secret in projected volume")
}
@@ -4698,7 +4695,7 @@ func TestRollingUpgradeForStatefulSetWithConfigmapUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with configmap")
}
@@ -4740,7 +4737,7 @@ func TestRollingUpgradeForStatefulSetWithPatchAndRetryUsingErs(t *testing.T) {
assert.Equal(t, patchtypes.StrategicMergePatchType, patchType)
assert.NotEmpty(t, bytes)
assert.Contains(t, string(bytes), `{"spec":{"template":{"spec":{"containers":[{"name":`)
assert.Contains(t, string(bytes), `"value":"6aa837180bdf6a93306c71a0cf62b4a45c2d5b021578247b3b64d5baea2b84d9"`)
assert.Contains(t, string(bytes), `"value":"f821414d40d8815fb330763f74a4ff7ab651d4fa"`)
return nil
}
@@ -4752,7 +4749,7 @@ func TestRollingUpgradeForStatefulSetWithPatchAndRetryUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with configmap")
}
@@ -4773,7 +4770,7 @@ func TestRollingUpgradeForStatefulSetWithConfigmapInProjectedVolumeUsingErs(t *t
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with configmap in projected volume")
}
@@ -4805,7 +4802,7 @@ func TestRollingUpgradeForStatefulSetWithSecretUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with secret")
}
@@ -4837,7 +4834,7 @@ func TestRollingUpgradeForStatefulSetWithSecretProviderClassUsingErs(t *testing.
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with SecretProviderClass")
}
@@ -4869,7 +4866,7 @@ func TestRollingUpgradeForStatefulSetWithSecretInProjectedVolumeUsingErs(t *test
collectors := getCollectors()
err := PerformAction(clients, config, statefulSetFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for StatefulSet with secret in projected volume")
}
@@ -4901,7 +4898,7 @@ func TestRollingUpgradeForDeploymentWithPodAnnotationsUsingErs(t *testing.T) {
collectors := getCollectors()
err := PerformAction(clients, config, deploymentFuncs, collectors, nil, invokeReloadStrategy)
testutil.TestSleep(100 * time.Millisecond)
time.Sleep(5 * time.Second)
if err != nil {
t.Errorf("Rolling upgrade failed for Deployment with pod annotations")
}
@@ -5045,7 +5042,6 @@ func testPausingDeployment(t *testing.T, reloadStrategy string, testName string,
}
logrus.Infof("Verifying deployment has been resumed after pause interval")
// This sleep tests the pause functionality - reduce time in fast mode but keep some delay for test stability
time.Sleep(11 * time.Second)
items = deploymentFuncs.ItemsFunc(clients, config.Namespace)
deploymentPaused, err = isDeploymentPaused(items, testName)

View File

@@ -45,7 +45,7 @@ func TestHealthz(t *testing.T) {
want := 200
if got != want {
t.Fatalf("got: %q, want: %q", got, want)
t.Fatalf("got: %d, want: %d", got, want)
}
// Have the liveness probe serve a 500
@@ -63,7 +63,7 @@ func TestHealthz(t *testing.T) {
want = 500
if got != want {
t.Fatalf("got: %q, want: %q", got, want)
t.Fatalf("got: %d, want: %d", got, want)
}
}
@@ -89,7 +89,7 @@ func TestRunLeaderElection(t *testing.T) {
want := 500
if got != want {
t.Fatalf("got: %q, want: %q", got, want)
t.Fatalf("got: %d, want: %d", got, want)
}
// Cancel the leader election context, so leadership is released and
@@ -108,7 +108,7 @@ func TestRunLeaderElection(t *testing.T) {
want = 500
if got != want {
t.Fatalf("got: %q, want: %q", got, want)
t.Fatalf("got: %d, want: %d", got, want)
}
}

View File

@@ -1,54 +1,390 @@
package metrics
import (
"context"
"net/http"
"net/url"
"os"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"k8s.io/client-go/tools/metrics"
)
// clientGoRequestMetrics implements metrics.LatencyMetric and metrics.ResultMetric
// to expose client-go's rest_client_requests_total metric
type clientGoRequestMetrics struct {
requestCounter *prometheus.CounterVec
requestLatency *prometheus.HistogramVec
}
func (m *clientGoRequestMetrics) Increment(ctx context.Context, code string, method string, host string) {
m.requestCounter.WithLabelValues(code, method, host).Inc()
}
func (m *clientGoRequestMetrics) Observe(ctx context.Context, verb string, u url.URL, latency time.Duration) {
m.requestLatency.WithLabelValues(verb, u.Host).Observe(latency.Seconds())
}
var clientGoMetrics = &clientGoRequestMetrics{
requestCounter: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "rest_client_requests_total",
Help: "Number of HTTP requests, partitioned by status code, method, and host.",
},
[]string{"code", "method", "host"},
),
requestLatency: prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_duration_seconds",
Help: "Request latency in seconds. Broken down by verb and host.",
Buckets: []float64{0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30},
},
[]string{"verb", "host"},
),
}
func init() {
// Register the metrics collectors
prometheus.MustRegister(clientGoMetrics.requestCounter)
prometheus.MustRegister(clientGoMetrics.requestLatency)
// Register our metrics implementation with client-go
metrics.RequestResult = clientGoMetrics
metrics.RequestLatency = clientGoMetrics
}
// Collectors holds all Prometheus metrics collectors for Reloader.
type Collectors struct {
Reloaded *prometheus.CounterVec
ReloadedByNamespace *prometheus.CounterVec
countByNamespace bool
ReconcileTotal *prometheus.CounterVec // Total reconcile calls by result
ReconcileDuration *prometheus.HistogramVec // Time spent in reconcile/handler
ActionTotal *prometheus.CounterVec // Total actions by workload kind and result
ActionLatency *prometheus.HistogramVec // Time from event to action applied
SkippedTotal *prometheus.CounterVec // Skipped operations by reason
QueueDepth prometheus.Gauge // Current queue depth
QueueAdds prometheus.Counter // Total items added to queue
QueueLatency *prometheus.HistogramVec // Time spent in queue
ErrorsTotal *prometheus.CounterVec // Errors by type
RetriesTotal prometheus.Counter // Total retries
EventsReceived *prometheus.CounterVec // Events received by type (add/update/delete)
EventsProcessed *prometheus.CounterVec // Events processed by type and result
WorkloadsScanned *prometheus.CounterVec // Workloads scanned by kind
WorkloadsMatched *prometheus.CounterVec // Workloads matched for reload by kind
}
// RecordReload records a reload event with the given success status and namespace.
// Preserved for backward compatibility.
func (c *Collectors) RecordReload(success bool, namespace string) {
if c == nil {
return
}
successLabel := "false"
if success {
successLabel = "true"
}
c.Reloaded.With(prometheus.Labels{"success": successLabel}).Inc()
if c.countByNamespace {
c.ReloadedByNamespace.With(prometheus.Labels{
"success": successLabel,
"namespace": namespace,
}).Inc()
}
}
// RecordReconcile records a reconcile/handler invocation.
func (c *Collectors) RecordReconcile(result string, duration time.Duration) {
if c == nil {
return
}
c.ReconcileTotal.With(prometheus.Labels{"result": result}).Inc()
c.ReconcileDuration.With(prometheus.Labels{"result": result}).Observe(duration.Seconds())
}
// RecordAction records a reload action on a workload.
func (c *Collectors) RecordAction(workloadKind string, result string, latency time.Duration) {
if c == nil {
return
}
c.ActionTotal.With(prometheus.Labels{"workload_kind": workloadKind, "result": result}).Inc()
c.ActionLatency.With(prometheus.Labels{"workload_kind": workloadKind}).Observe(latency.Seconds())
}
// RecordSkipped records a skipped operation with reason.
func (c *Collectors) RecordSkipped(reason string) {
if c == nil {
return
}
c.SkippedTotal.With(prometheus.Labels{"reason": reason}).Inc()
}
// RecordQueueAdd records an item being added to the queue.
func (c *Collectors) RecordQueueAdd() {
if c == nil {
return
}
c.QueueAdds.Inc()
}
// SetQueueDepth sets the current queue depth.
func (c *Collectors) SetQueueDepth(depth int) {
if c == nil {
return
}
c.QueueDepth.Set(float64(depth))
}
// RecordQueueLatency records how long an item spent in the queue.
func (c *Collectors) RecordQueueLatency(latency time.Duration) {
if c == nil {
return
}
c.QueueLatency.With(prometheus.Labels{}).Observe(latency.Seconds())
}
// RecordError records an error by type.
func (c *Collectors) RecordError(errorType string) {
if c == nil {
return
}
c.ErrorsTotal.With(prometheus.Labels{"type": errorType}).Inc()
}
// RecordRetry records a retry attempt.
func (c *Collectors) RecordRetry() {
if c == nil {
return
}
c.RetriesTotal.Inc()
}
// RecordEventReceived records an event being received.
func (c *Collectors) RecordEventReceived(eventType string, resourceType string) {
if c == nil {
return
}
c.EventsReceived.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType}).Inc()
}
// RecordEventProcessed records an event being processed.
func (c *Collectors) RecordEventProcessed(eventType string, resourceType string, result string) {
if c == nil {
return
}
c.EventsProcessed.With(prometheus.Labels{"event_type": eventType, "resource_type": resourceType, "result": result}).Inc()
}
// RecordWorkloadsScanned records workloads scanned during a reconcile.
func (c *Collectors) RecordWorkloadsScanned(kind string, count int) {
if c == nil {
return
}
c.WorkloadsScanned.With(prometheus.Labels{"kind": kind}).Add(float64(count))
}
// RecordWorkloadsMatched records workloads matched for reload.
func (c *Collectors) RecordWorkloadsMatched(kind string, count int) {
if c == nil {
return
}
c.WorkloadsMatched.With(prometheus.Labels{"kind": kind}).Add(float64(count))
}
func NewCollectors() Collectors {
// Existing metrics (preserved)
reloaded := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "reload_executed_total",
Help: "Counter of reloads executed by Reloader.",
},
[]string{
"success",
},
[]string{"success"},
)
//set 0 as default value
reloaded.With(prometheus.Labels{"success": "true"}).Add(0)
reloaded.With(prometheus.Labels{"success": "false"}).Add(0)
reloaded_by_namespace := prometheus.NewCounterVec(
reloadedByNamespace := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "reload_executed_total_by_namespace",
Help: "Counter of reloads executed by Reloader by namespace.",
},
[]string{
"success",
"namespace",
[]string{"success", "namespace"},
)
reconcileTotal := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "reconcile_total",
Help: "Total number of reconcile/handler invocations by result.",
},
[]string{"result"},
)
reconcileDuration := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "reloader",
Name: "reconcile_duration_seconds",
Help: "Time spent in reconcile/handler in seconds.",
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
},
[]string{"result"},
)
actionTotal := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "action_total",
Help: "Total number of reload actions by workload kind and result.",
},
[]string{"workload_kind", "result"},
)
actionLatency := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "reloader",
Name: "action_latency_seconds",
Help: "Time from event received to action applied in seconds.",
Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60},
},
[]string{"workload_kind"},
)
skippedTotal := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "skipped_total",
Help: "Total number of skipped operations by reason.",
},
[]string{"reason"},
)
queueDepth := prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "reloader",
Name: "workqueue_depth",
Help: "Current depth of the work queue.",
},
)
queueAdds := prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "workqueue_adds_total",
Help: "Total number of items added to the work queue.",
},
)
queueLatency := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "reloader",
Name: "workqueue_latency_seconds",
Help: "Time spent in the work queue in seconds.",
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
},
[]string{},
)
errorsTotal := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "errors_total",
Help: "Total number of errors by type.",
},
[]string{"type"},
)
retriesTotal := prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "retries_total",
Help: "Total number of retry attempts.",
},
)
eventsReceived := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "events_received_total",
Help: "Total number of events received by type and resource.",
},
[]string{"event_type", "resource_type"},
)
eventsProcessed := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "events_processed_total",
Help: "Total number of events processed by type, resource, and result.",
},
[]string{"event_type", "resource_type", "result"},
)
workloadsScanned := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "workloads_scanned_total",
Help: "Total number of workloads scanned by kind.",
},
[]string{"kind"},
)
workloadsMatched := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "reloader",
Name: "workloads_matched_total",
Help: "Total number of workloads matched for reload by kind.",
},
[]string{"kind"},
)
return Collectors{
Reloaded: reloaded,
ReloadedByNamespace: reloaded_by_namespace,
ReloadedByNamespace: reloadedByNamespace,
countByNamespace: os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled",
ReconcileTotal: reconcileTotal,
ReconcileDuration: reconcileDuration,
ActionTotal: actionTotal,
ActionLatency: actionLatency,
SkippedTotal: skippedTotal,
QueueDepth: queueDepth,
QueueAdds: queueAdds,
QueueLatency: queueLatency,
ErrorsTotal: errorsTotal,
RetriesTotal: retriesTotal,
EventsReceived: eventsReceived,
EventsProcessed: eventsProcessed,
WorkloadsScanned: workloadsScanned,
WorkloadsMatched: workloadsMatched,
}
}
func SetupPrometheusEndpoint() Collectors {
collectors := NewCollectors()
prometheus.MustRegister(collectors.Reloaded)
prometheus.MustRegister(collectors.ReconcileTotal)
prometheus.MustRegister(collectors.ReconcileDuration)
prometheus.MustRegister(collectors.ActionTotal)
prometheus.MustRegister(collectors.ActionLatency)
prometheus.MustRegister(collectors.SkippedTotal)
prometheus.MustRegister(collectors.QueueDepth)
prometheus.MustRegister(collectors.QueueAdds)
prometheus.MustRegister(collectors.QueueLatency)
prometheus.MustRegister(collectors.ErrorsTotal)
prometheus.MustRegister(collectors.RetriesTotal)
prometheus.MustRegister(collectors.EventsReceived)
prometheus.MustRegister(collectors.EventsProcessed)
prometheus.MustRegister(collectors.WorkloadsScanned)
prometheus.MustRegister(collectors.WorkloadsMatched)
if os.Getenv("METRICS_COUNT_BY_NAMESPACE") == "enabled" {
prometheus.MustRegister(collectors.ReloadedByNamespace)

View File

@@ -5,7 +5,6 @@ import (
"encoding/json"
"fmt"
"math/rand"
"os"
"sort"
"strconv"
"strings"
@@ -44,29 +43,8 @@ var (
SecretResourceType = "secrets"
// SecretproviderclasspodstatusResourceType is a resource type which controller watches for changes
SecretProviderClassPodStatusResourceType = "secretproviderclasspodstatuses"
// SkipTestSleeps can be set to true to skip all sleep delays in test utilities
// This significantly speeds up tests when using fake clients (which are synchronous)
// Set via environment variable RELOADER_SKIP_TEST_SLEEPS=true or in test code
SkipTestSleeps = false
)
func init() {
// Check environment variable to enable fast test mode
if os.Getenv("RELOADER_SKIP_TEST_SLEEPS") == "true" {
SkipTestSleeps = true
}
}
// TestSleep is a helper that only sleeps if SkipTestSleeps is false
// This allows tests to run much faster with fake clients while maintaining
// compatibility with real cluster testing if needed
func TestSleep(duration time.Duration) {
if !SkipTestSleeps {
time.Sleep(duration)
}
}
var (
Clients = kube.GetClients()
Pod = "test-reloader-" + RandSeq(5)
@@ -861,7 +839,7 @@ func CreateConfigMap(client kubernetes.Interface, namespace string, configmapNam
logrus.Infof("Creating configmap")
configmapClient := client.CoreV1().ConfigMaps(namespace)
_, err := configmapClient.Create(context.TODO(), GetConfigmap(namespace, configmapName, data), metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return configmapClient, err
}
@@ -870,7 +848,7 @@ func CreateSecretProviderClass(client csiclient.Interface, namespace string, sec
logrus.Infof("Creating SecretProviderClass")
secretProviderClassClient := client.SecretsstoreV1().SecretProviderClasses(namespace)
_, err := secretProviderClassClient.Create(context.TODO(), GetSecretProviderClass(namespace, secretProviderClassName, data), metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return secretProviderClassClient, err
}
@@ -880,7 +858,7 @@ func CreateSecretProviderClassPodStatus(client csiclient.Interface, namespace st
secretProviderClassPodStatusClient := client.SecretsstoreV1().SecretProviderClassPodStatuses(namespace)
secretProviderClassPodStatus := GetSecretProviderClassPodStatus(namespace, secretProviderClassPodStatusName, data)
_, err := secretProviderClassPodStatusClient.Create(context.TODO(), secretProviderClassPodStatus, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return secretProviderClassPodStatusClient, err
}
@@ -889,7 +867,7 @@ func CreateSecret(client kubernetes.Interface, namespace string, secretName stri
logrus.Infof("Creating secret")
secretClient := client.CoreV1().Secrets(namespace)
_, err := secretClient.Create(context.TODO(), GetSecret(namespace, secretName, data), metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return secretClient, err
}
@@ -904,7 +882,7 @@ func CreateDeployment(client kubernetes.Interface, deploymentName string, namesp
deploymentObj = GetDeploymentWithEnvVars(namespace, deploymentName)
}
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -924,7 +902,7 @@ func CreateDeploymentWithAnnotations(client kubernetes.Interface, deploymentName
}
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -939,7 +917,7 @@ func CreateDeploymentConfig(client appsclient.Interface, deploymentName string,
deploymentConfigObj = GetDeploymentConfigWithEnvVars(namespace, deploymentName)
}
deploymentConfig, err := deploymentConfigsClient.Create(context.TODO(), deploymentConfigObj, metav1.CreateOptions{})
TestSleep(5 * time.Second)
time.Sleep(5 * time.Second)
return deploymentConfig, err
}
@@ -954,7 +932,7 @@ func CreateDeploymentWithInitContainer(client kubernetes.Interface, deploymentNa
deploymentObj = GetDeploymentWithInitContainerAndEnv(namespace, deploymentName)
}
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -964,7 +942,7 @@ func CreateDeploymentWithEnvVarSource(client kubernetes.Interface, deploymentNam
deploymentClient := client.AppsV1().Deployments(namespace)
deploymentObj := GetDeploymentWithEnvVarSources(namespace, deploymentName)
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -975,7 +953,7 @@ func CreateDeploymentWithPodAnnotations(client kubernetes.Interface, deploymentN
deploymentClient := client.AppsV1().Deployments(namespace)
deploymentObj := GetDeploymentWithPodAnnotations(namespace, deploymentName, both)
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -987,7 +965,7 @@ func CreateDeploymentWithEnvVarSourceAndAnnotations(client kubernetes.Interface,
deploymentObj := GetDeploymentWithEnvVarSources(namespace, deploymentName)
deploymentObj.Annotations = annotations
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -997,7 +975,7 @@ func CreateDeploymentWithTypedAutoAnnotation(client kubernetes.Interface, deploy
deploymentClient := client.AppsV1().Deployments(namespace)
deploymentObj := GetDeploymentWithTypedAutoAnnotation(namespace, deploymentName, resourceType)
deployment, err := deploymentClient.Create(context.TODO(), deploymentObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deployment, err
}
@@ -1021,7 +999,7 @@ func CreateDaemonSet(client kubernetes.Interface, daemonsetName string, namespac
daemonsetObj = GetDaemonSetWithEnvVars(namespace, daemonsetName)
}
daemonset, err := daemonsetClient.Create(context.TODO(), daemonsetObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return daemonset, err
}
@@ -1036,7 +1014,7 @@ func CreateStatefulSet(client kubernetes.Interface, statefulsetName string, name
statefulsetObj = GetStatefulSetWithEnvVar(namespace, statefulsetName)
}
statefulset, err := statefulsetClient.Create(context.TODO(), statefulsetObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return statefulset, err
}
@@ -1051,7 +1029,7 @@ func CreateCronJob(client kubernetes.Interface, cronJobName string, namespace st
cronJobObj = GetCronJobWithEnvVar(namespace, cronJobName)
}
cronJob, err := cronJobClient.Create(context.TODO(), cronJobObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return cronJob, err
}
@@ -1066,7 +1044,7 @@ func CreateJob(client kubernetes.Interface, jobName string, namespace string, vo
jobObj = GetJobWithEnvVar(namespace, jobName)
}
job, err := jobClient.Create(context.TODO(), jobObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return job, err
}
@@ -1074,7 +1052,7 @@ func CreateJob(client kubernetes.Interface, jobName string, namespace string, vo
func DeleteDeployment(client kubernetes.Interface, namespace string, deploymentName string) error {
logrus.Infof("Deleting Deployment")
deploymentError := client.AppsV1().Deployments(namespace).Delete(context.TODO(), deploymentName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deploymentError
}
@@ -1082,7 +1060,7 @@ func DeleteDeployment(client kubernetes.Interface, namespace string, deploymentN
func DeleteDeploymentConfig(client appsclient.Interface, namespace string, deploymentConfigName string) error {
logrus.Infof("Deleting DeploymentConfig")
deploymentConfigError := client.AppsV1().DeploymentConfigs(namespace).Delete(context.TODO(), deploymentConfigName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return deploymentConfigError
}
@@ -1090,7 +1068,7 @@ func DeleteDeploymentConfig(client appsclient.Interface, namespace string, deplo
func DeleteDaemonSet(client kubernetes.Interface, namespace string, daemonsetName string) error {
logrus.Infof("Deleting DaemonSet %s", daemonsetName)
daemonsetError := client.AppsV1().DaemonSets(namespace).Delete(context.TODO(), daemonsetName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return daemonsetError
}
@@ -1098,7 +1076,7 @@ func DeleteDaemonSet(client kubernetes.Interface, namespace string, daemonsetNam
func DeleteStatefulSet(client kubernetes.Interface, namespace string, statefulsetName string) error {
logrus.Infof("Deleting StatefulSet %s", statefulsetName)
statefulsetError := client.AppsV1().StatefulSets(namespace).Delete(context.TODO(), statefulsetName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return statefulsetError
}
@@ -1106,7 +1084,7 @@ func DeleteStatefulSet(client kubernetes.Interface, namespace string, statefulse
func DeleteCronJob(client kubernetes.Interface, namespace string, cronJobName string) error {
logrus.Infof("Deleting CronJob %s", cronJobName)
cronJobError := client.BatchV1().CronJobs(namespace).Delete(context.TODO(), cronJobName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return cronJobError
}
@@ -1114,7 +1092,7 @@ func DeleteCronJob(client kubernetes.Interface, namespace string, cronJobName st
func DeleteJob(client kubernetes.Interface, namespace string, jobName string) error {
logrus.Infof("Deleting Job %s", jobName)
jobError := client.BatchV1().Jobs(namespace).Delete(context.TODO(), jobName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return jobError
}
@@ -1128,7 +1106,7 @@ func UpdateConfigMap(configmapClient core_v1.ConfigMapInterface, namespace strin
configmap = GetConfigmap(namespace, configmapName, data)
}
_, updateErr := configmapClient.Update(context.TODO(), configmap, metav1.UpdateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return updateErr
}
@@ -1142,7 +1120,7 @@ func UpdateSecret(secretClient core_v1.SecretInterface, namespace string, secret
secret = GetSecret(namespace, secretName, data)
}
_, updateErr := secretClient.Update(context.TODO(), secret, metav1.UpdateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return updateErr
}
@@ -1163,7 +1141,7 @@ func UpdateSecretProviderClassPodStatus(spcpsClient csiclient_v1.SecretProviderC
labels["firstLabel"] = label
}
_, updateErr := spcpsClient.Update(context.TODO(), secretproviderclasspodstatus, metav1.UpdateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return updateErr
}
@@ -1171,7 +1149,7 @@ func UpdateSecretProviderClassPodStatus(spcpsClient csiclient_v1.SecretProviderC
func DeleteConfigMap(client kubernetes.Interface, namespace string, configmapName string) error {
logrus.Infof("Deleting configmap %q.\n", configmapName)
err := client.CoreV1().ConfigMaps(namespace).Delete(context.TODO(), configmapName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return err
}
@@ -1179,7 +1157,7 @@ func DeleteConfigMap(client kubernetes.Interface, namespace string, configmapNam
func DeleteSecret(client kubernetes.Interface, namespace string, secretName string) error {
logrus.Infof("Deleting secret %q.\n", secretName)
err := client.CoreV1().Secrets(namespace).Delete(context.TODO(), secretName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return err
}
@@ -1187,7 +1165,7 @@ func DeleteSecret(client kubernetes.Interface, namespace string, secretName stri
func DeleteSecretProviderClass(client csiclient.Interface, namespace string, secretProviderClassName string) error {
logrus.Infof("Deleting secretproviderclass %q.\n", secretProviderClassName)
err := client.SecretsstoreV1().SecretProviderClasses(namespace).Delete(context.TODO(), secretProviderClassName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return err
}
@@ -1195,7 +1173,7 @@ func DeleteSecretProviderClass(client csiclient.Interface, namespace string, sec
func DeleteSecretProviderClassPodStatus(client csiclient.Interface, namespace string, secretProviderClassPodStatusName string) error {
logrus.Infof("Deleting secretproviderclasspodstatus %q.\n", secretProviderClassPodStatusName)
err := client.SecretsstoreV1().SecretProviderClassPodStatuses(namespace).Delete(context.TODO(), secretProviderClassPodStatusName, metav1.DeleteOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return err
}
@@ -1377,6 +1355,6 @@ func CreateRollout(client argorollout.Interface, rolloutName string, namespace s
rolloutClient := client.ArgoprojV1alpha1().Rollouts(namespace)
rolloutObj := GetRollout(namespace, rolloutName, annotations)
rollout, err := rolloutClient.Create(context.TODO(), rolloutObj, metav1.CreateOptions{})
TestSleep(3 * time.Second)
time.Sleep(3 * time.Second)
return rollout, err
}

View File

@@ -18,11 +18,12 @@ import (
var Version = "dev"
var Commit = "unknown"
var BuildDate = "unknown"
var Edition = "oss"
const (
MetaInfoConfigmapName = "reloader-meta-info"
MetaInfoConfigmapLabelKey = "reloader.stakater.com/meta-info"
MetaInfoConfigmapLabelValue = "reloader-oss"
MetaInfoConfigmapLabelValue = "reloader"
)
// MetaInfo contains comprehensive metadata about the Reloader instance.
@@ -47,6 +48,9 @@ type BuildInfo struct {
CommitHash string `json:"commitHash"`
// CommitTime is the timestamp of the Git commit used to build this binary
CommitTime time.Time `json:"commitTime"`
// Edition indicates the edition of Reloader (e.g., OSS, Enterprise)
Edition string `json:"edition"`
}
func NewBuildInfo() *BuildInfo {
@@ -55,6 +59,7 @@ func NewBuildInfo() *BuildInfo {
ReleaseVersion: Version,
CommitHash: Commit,
CommitTime: ParseUTCTime(BuildDate),
Edition: Edition,
}
return metaInfo

View File

@@ -8,8 +8,8 @@ import (
// ResourceMap are resources from where changes are going to be detected
var ResourceMap = map[string]runtime.Object{
"configmaps": &v1.ConfigMap{},
"secrets": &v1.Secret{},
"namespaces": &v1.Namespace{},
"configmaps": &v1.ConfigMap{},
"secrets": &v1.Secret{},
"namespaces": &v1.Namespace{},
"secretproviderclasspodstatuses": &csiv1.SecretProviderClassPodStatus{},
}

544
test/loadtest/README.md Normal file
View File

@@ -0,0 +1,544 @@
# Reloader Load Test Framework
This framework provides A/B comparison testing between two Reloader container images.
## Overview
The load test framework:
1. Creates a local kind cluster (1 control-plane + 6 worker nodes)
2. Deploys Prometheus for metrics collection
3. Loads the provided Reloader container images into the cluster
4. Runs standardized test scenarios (S1-S13)
5. Collects metrics via Prometheus scraping
6. Generates comparison reports with pass/fail criteria
## Prerequisites
- Docker or Podman
- kind (Kubernetes in Docker)
- kubectl
- Go 1.22+
## Building
```bash
cd test/loadtest
go build -o loadtest ./cmd/loadtest
```
## Quick Start
```bash
# Compare two published images (e.g., different versions)
./loadtest run \
--old-image=stakater/reloader:v1.0.0 \
--new-image=stakater/reloader:v1.1.0
# Run a specific scenario
./loadtest run \
--old-image=stakater/reloader:v1.0.0 \
--new-image=stakater/reloader:v1.1.0 \
--scenario=S2 \
--duration=120
# Test only a single image (no comparison)
./loadtest run --new-image=myregistry/reloader:dev
# Use local images built with docker/podman
./loadtest run \
--old-image=localhost/reloader:baseline \
--new-image=localhost/reloader:feature-branch
# Skip cluster creation (use existing kind cluster)
./loadtest run \
--old-image=stakater/reloader:v1.0.0 \
--new-image=stakater/reloader:v1.1.0 \
--skip-cluster
# Run all scenarios in parallel on 4 clusters (faster execution)
./loadtest run \
--new-image=localhost/reloader:dev \
--parallelism=4
# Run all 13 scenarios in parallel (one cluster per scenario)
./loadtest run \
--new-image=localhost/reloader:dev \
--parallelism=13
# Generate report from existing results
./loadtest report --scenario=S2 --results-dir=./results
```
## Command Line Options
### Run Command
| Option | Description | Default |
|--------|-------------|---------|
| `--old-image=IMAGE` | Container image for "old" version | - |
| `--new-image=IMAGE` | Container image for "new" version | - |
| `--scenario=ID` | Test scenario: S1-S13 or "all" | all |
| `--duration=SECONDS` | Test duration in seconds | 60 |
| `--parallelism=N` | Run N scenarios in parallel on N kind clusters | 1 |
| `--skip-cluster` | Skip kind cluster creation (use existing, only for parallelism=1) | false |
| `--results-dir=DIR` | Directory for results | ./results |
**Note:** At least one of `--old-image` or `--new-image` is required. Provide both for A/B comparison.
### Report Command
| Option | Description | Default |
|--------|-------------|---------|
| `--scenario=ID` | Scenario to report on (required) | - |
| `--results-dir=DIR` | Directory containing results | ./results |
| `--output=FILE` | Output file (default: stdout) | - |
## Test Scenarios
| ID | Name | Description |
|-----|-----------------------|-------------------------------------------------|
| S1 | Burst Updates | Many ConfigMap/Secret updates in quick succession |
| S2 | Fan-Out | One ConfigMap used by many (50) workloads |
| S3 | High Cardinality | Many CMs/Secrets across many namespaces |
| S4 | No-Op Updates | Updates that don't change data (annotation only)|
| S5 | Workload Churn | Deployments created/deleted rapidly |
| S6 | Controller Restart | Restart controller pod under load |
| S7 | API Pressure | Many concurrent update requests |
| S8 | Large Objects | ConfigMaps > 100KB |
| S9 | Multi-Workload Types | Tests all workload types (Deploy, STS, DS) |
| S10 | Secrets + Mixed | Secrets and mixed ConfigMap+Secret workloads |
| S11 | Annotation Strategy | Tests `--reload-strategy=annotations` |
| S12 | Pause & Resume | Tests pause-period during rapid updates |
| S13 | Complex References | Init containers, valueFrom, projected volumes |
## Metrics Reference
This section explains each metric collected during load tests, what it measures, and what different values might indicate.
### Counter Metrics (Totals)
#### `reconcile_total`
**What it measures:** The total number of reconciliation loops executed by the controller.
**What it indicates:**
- **Higher in new vs old:** The new controller-runtime implementation may batch events differently. This is often expected behavior, not a problem.
- **Lower in new vs old:** Better event batching/deduplication. Controller-runtime's work queue naturally deduplicates events.
- **Expected behavior:** The new implementation typically has *fewer* reconciles due to intelligent event batching.
#### `action_total`
**What it measures:** The total number of reload actions triggered (rolling restarts of Deployments/StatefulSets/DaemonSets).
**What it indicates:**
- **Should match expected value:** Both implementations should trigger the same number of reloads for the same workload.
- **Lower than expected:** Some updates were missed - potential bug or race condition.
- **Higher than expected:** Duplicate reloads triggered - inefficiency but not data loss.
#### `reload_executed_total`
**What it measures:** Successful reload operations executed, labeled by `success=true/false`.
**What it indicates:**
- **`success=true` count:** Number of workloads successfully restarted.
- **`success=false` count:** Failed restart attempts (API errors, permission issues).
- **Should match `action_total`:** If significantly lower, reloads are failing.
#### `workloads_scanned_total`
**What it measures:** Number of workloads (Deployments, etc.) scanned when checking for ConfigMap/Secret references.
**What it indicates:**
- **High count:** Controller is scanning many workloads per reconcile.
- **Expected behavior:** Should roughly match the number of workloads × number of reconciles.
- **Optimization signal:** If very high, namespace filtering or label selectors could help.
#### `workloads_matched_total`
**What it measures:** Number of workloads that matched (reference the changed ConfigMap/Secret).
**What it indicates:**
- **Should match `reload_executed_total`:** Every matched workload should be reloaded.
- **Higher than reloads:** Some matched workloads weren't reloaded (potential issue).
#### `errors_total`
**What it measures:** Total errors encountered, labeled by error type.
**What it indicates:**
- **Should be 0:** Any errors indicate problems.
- **Common causes:** API server timeouts, RBAC issues, resource conflicts.
- **Critical metric:** Non-zero errors in production should be investigated.
### API Efficiency Metrics (REST Client)
These metrics track Kubernetes API server calls made by Reloader. Lower values indicate more efficient operation with less API server load.
#### `rest_client_requests_total`
**What it measures:** Total number of HTTP requests made to the Kubernetes API server.
**What it indicates:**
- **Lower is better:** Fewer API calls means less load on the API server.
- **High count:** May indicate inefficient caching or excessive reconciles.
- **Comparison use:** Shows overall API efficiency between implementations.
#### `rest_client_requests_get`
**What it measures:** Number of GET requests (fetching individual resources or listings).
**What it indicates:**
- **Includes:** Fetching ConfigMaps, Secrets, Deployments, etc.
- **Higher count:** More frequent resource fetching, possibly due to cache misses.
- **Expected behavior:** Controller-runtime's caching should reduce GET requests compared to direct API calls.
#### `rest_client_requests_patch`
**What it measures:** Number of PATCH requests (partial updates to resources).
**What it indicates:**
- **Used for:** Rolling restart annotations on workloads.
- **Should correlate with:** `reload_executed_total` - each reload typically requires one PATCH.
- **Lower is better:** Fewer patches means more efficient batching or deduplication.
#### `rest_client_requests_put`
**What it measures:** Number of PUT requests (full resource updates).
**What it indicates:**
- **Used for:** Full object replacements (less common than PATCH).
- **Should be low:** Most updates use PATCH for efficiency.
- **High count:** May indicate suboptimal update strategy.
#### `rest_client_requests_errors`
**What it measures:** Number of failed API requests (4xx/5xx responses).
**What it indicates:**
- **Should be 0:** Errors indicate API server issues or permission problems.
- **Common causes:** Rate limiting, RBAC issues, resource conflicts, network issues.
- **Non-zero:** Investigate API server logs and Reloader permissions.
### Latency Metrics (Percentiles)
All latency metrics are reported in **seconds**. The report shows p50 (median), p95, and p99 percentiles.
#### `reconcile_duration (s)`
**What it measures:** Time spent inside each reconcile loop, from start to finish.
**What it indicates:**
- **p50 (median):** Typical reconcile time. Should be < 100ms for good performance.
- **p95:** 95th percentile - only 5% of reconciles take longer than this.
- **p99:** 99th percentile - indicates worst-case performance.
**Interpreting differences:**
- **New higher than old:** Controller-runtime reconciles may do more work per loop but run fewer times. Check `reconcile_total` - if it's lower, this is expected.
- **Minor differences (< 0.5s absolute):** Not significant for sub-second values.
#### `action_latency (s)`
**What it measures:** End-to-end time from ConfigMap/Secret change detection to workload restart triggered.
**What it indicates:**
- **This is the user-facing latency:** How long users wait for their config changes to take effect.
- **p50 < 1s:** Excellent - most changes apply within a second.
- **p95 < 5s:** Good - even under load, changes apply quickly.
- **p99 > 10s:** May need investigation - some changes take too long.
**What affects this:**
- API server responsiveness
- Number of workloads to scan
- Concurrent updates competing for resources
### Understanding the Report
#### Report Columns
```
Metric Old New Expected Old✓ New✓ Status
------ --- --- -------- ---- ---- ------
action_total 100.00 100.00 100 ✓ ✓ pass
action_latency_p95 (s) 0.15 0.04 - - - pass
```
- **Old/New:** Measured values from each implementation
- **Expected:** Known expected value (for throughput metrics)
- **Old✓/New✓:** Whether the value is within 15% of expected (✓ = yes, ✗ = no, - = no expected value)
- **Status:** pass/fail based on comparison thresholds
#### Pass/Fail Logic
| Metric Type | Pass Condition |
|-------------|----------------|
| Throughput (action_total, reload_executed_total) | New value within 15% of expected |
| Latency (p50, p95, p99) | New not more than threshold% worse than old, OR absolute difference < minimum threshold |
| Errors | New ≤ Old (ideally both 0) |
| API Efficiency (rest_client_requests_*) | New ≤ Old (lower is better), or New not more than 50% higher |
#### Latency Thresholds
Latency comparisons use both percentage AND absolute thresholds to avoid false failures:
| Metric | Max % Worse | Min Absolute Diff |
|--------|-------------|-------------------|
| p50 | 100% | 0.5s |
| p95 | 100% | 1.0s |
| p99 | 100% | 1.0s |
**Example:** If old p50 = 0.01s and new p50 = 0.08s:
- Percentage difference: +700% (would fail % check)
- Absolute difference: 0.07s (< 0.5s threshold)
- **Result: PASS** (both values are fast enough that the difference doesn't matter)
### Resource Consumption Metrics
These metrics track CPU, memory, and Go runtime resource usage. Lower values generally indicate more efficient operation.
#### Memory Metrics
| Metric | Description | Unit |
|--------|-------------|------|
| `memory_rss_mb_avg` | Average RSS (resident set size) memory | MB |
| `memory_rss_mb_max` | Peak RSS memory during test | MB |
| `memory_heap_mb_avg` | Average Go heap allocation | MB |
| `memory_heap_mb_max` | Peak Go heap allocation | MB |
**What to watch for:**
- **High RSS:** May indicate memory leaks or inefficient caching
- **High heap:** Many objects being created (check GC metrics)
- **Growing over time:** Potential memory leak
#### CPU Metrics
| Metric | Description | Unit |
|--------|-------------|------|
| `cpu_cores_avg` | Average CPU usage rate | cores |
| `cpu_cores_max` | Peak CPU usage rate | cores |
**What to watch for:**
- **High CPU:** Inefficient algorithms or excessive reconciles
- **Spiky max:** May indicate burst handling issues
#### Go Runtime Metrics
| Metric | Description | Unit |
|--------|-------------|------|
| `goroutines_avg` | Average goroutine count | count |
| `goroutines_max` | Peak goroutine count | count |
| `gc_pause_p99_ms` | 99th percentile GC pause time | ms |
**What to watch for:**
- **High goroutines:** Potential goroutine leak or unbounded concurrency
- **High GC pause:** Large heap or allocation pressure
### Scenario-Specific Expectations
| Scenario | Key Metrics to Watch | Expected Behavior |
|----------|---------------------|-------------------|
| S1 (Burst) | action_latency_p99, cpu_cores_max, goroutines_max | Should handle bursts without queue backup |
| S2 (Fan-Out) | reconcile_total, workloads_matched, memory_rss_mb_max | One CM change → 50 workload reloads |
| S3 (High Cardinality) | reconcile_duration, memory_heap_mb_avg | Many namespaces shouldn't increase memory |
| S4 (No-Op) | action_total = 0, cpu_cores_avg should be low | Minimal resource usage for no-op |
| S5 (Churn) | errors_total, goroutines_avg | Graceful handling, no goroutine leak |
| S6 (Restart) | All metrics captured | Metrics survive controller restart |
| S7 (API Pressure) | errors_total, cpu_cores_max, goroutines_max | No errors under concurrent load |
| S8 (Large Objects) | memory_rss_mb_max, gc_pause_p99_ms | Large ConfigMaps don't cause OOM or GC issues |
| S9 (Multi-Workload) | reload_executed_total per type | All workload types (Deploy, STS, DS) reload |
| S10 (Secrets) | reload_executed_total, workloads_matched | Both Secrets and ConfigMaps trigger reloads |
| S11 (Annotation) | workload annotations present | Deployments get `last-reloaded-from` annotation |
| S12 (Pause) | reload_executed_total << updates | Pause-period reduces reload frequency |
| S13 (Complex) | reload_executed_total | All reference types trigger reloads |
### Troubleshooting
#### New implementation shows 0 for all metrics
- Check if Prometheus is scraping the new Reloader pod
- Verify pod annotations: `prometheus.io/scrape: "true"`
- Check Prometheus targets: `http://localhost:9091/targets`
#### Metrics don't match expected values
- Verify test ran to completion (check logs)
- Ensure Prometheus scraped final metrics (18s wait after test)
- Check for pod restarts during test (metrics reset on restart - handled by `increase()`)
#### High latency in new implementation
- Check Reloader pod resource limits
- Look for API server throttling in logs
- Compare `reconcile_total` - fewer reconciles with higher duration may be normal
#### REST client errors are non-zero
- **Common causes:**
- Optional CRD schemes registered but CRDs not installed (e.g., Argo Rollouts, OpenShift DeploymentConfig)
- API server rate limiting under high load
- RBAC permissions missing for certain resource types
- **Argo Rollouts errors:** If you see ~4 errors per test, ensure `--enable-argo-rollouts=false` if not using Argo Rollouts
- **OpenShift errors:** Similarly, ensure DeploymentConfig support is disabled on non-OpenShift clusters
#### REST client requests much higher in new implementation
- Check if caching is working correctly
- Look for excessive re-queuing in controller logs
- Compare `reconcile_total` - more reconciles naturally means more API calls
## Report Format
The report generator produces a comparison table with units and expected value indicators:
```
================================================================================
RELOADER A/B COMPARISON REPORT
================================================================================
Scenario: S2
Generated: 2026-01-03 14:30:00
Status: PASS
Summary: All metrics within acceptable thresholds
Test: S2: Fan-out test - 1 CM update triggers 50 deployment reloads
--------------------------------------------------------------------------------
METRIC COMPARISONS
--------------------------------------------------------------------------------
(Old✓/New✓ = meets expected value within 15%)
Metric Old New Expected Old✓ New✓ Status
------ --- --- -------- ---- ---- ------
reconcile_total 50.00 25.00 - - - pass
reconcile_duration_p50 (s) 0.01 0.05 - - - pass
reconcile_duration_p95 (s) 0.02 0.15 - - - pass
action_total 50.00 50.00 50 ✓ ✓ pass
action_latency_p50 (s) 0.05 0.03 - - - pass
action_latency_p95 (s) 0.12 0.08 - - - pass
errors_total 0.00 0.00 - - - pass
reload_executed_total 50.00 50.00 50 ✓ ✓ pass
workloads_scanned_total 50.00 50.00 50 ✓ ✓ pass
workloads_matched_total 50.00 50.00 50 ✓ ✓ pass
rest_client_requests_total 850 720 - - - pass
rest_client_requests_get 500 420 - - - pass
rest_client_requests_patch 300 250 - - - pass
rest_client_requests_errors 0 0 - - - pass
```
Reports are saved to `results/<scenario>/report.txt` after each test.
## Directory Structure
```
test/loadtest/
├── cmd/
│ └── loadtest/ # Unified CLI (run + report)
│ └── main.go
├── internal/
│ ├── cluster/ # Kind cluster management
│ │ └── kind.go
│ ├── prometheus/ # Prometheus deployment & querying
│ │ └── prometheus.go
│ ├── reloader/ # Reloader deployment
│ │ └── deploy.go
│ └── scenarios/ # Test scenario implementations
│ └── scenarios.go
├── manifests/
│ └── prometheus.yaml # Prometheus deployment manifest
├── results/ # Generated after tests
│ └── <scenario>/
│ ├── old/ # Old version data
│ │ ├── *.json # Prometheus metric snapshots
│ │ └── reloader.log # Reloader pod logs
│ ├── new/ # New version data
│ │ ├── *.json # Prometheus metric snapshots
│ │ └── reloader.log # Reloader pod logs
│ ├── expected.json # Expected values from test
│ └── report.txt # Comparison report
├── go.mod
├── go.sum
└── README.md
```
## Building Local Images for Testing
If you want to test local code changes:
```bash
# Build the new Reloader image from current source
docker build -t localhost/reloader:dev -f Dockerfile .
# Build from a different branch/commit
git checkout feature-branch
docker build -t localhost/reloader:feature -f Dockerfile .
# Then run comparison
./loadtest run \
--old-image=stakater/reloader:v1.0.0 \
--new-image=localhost/reloader:feature
```
## Interpreting Results
### PASS
All metrics are within acceptable thresholds. The new implementation is comparable or better than the old one.
### FAIL
One or more metrics exceeded thresholds. Review the specific metrics:
- **Latency degradation**: p95/p99 latencies are significantly higher
- **Missed reloads**: `reload_executed_total` differs significantly
- **Errors increased**: `errors_total` is higher in new version
### Investigation
If tests fail, check:
1. Pod logs: `kubectl logs -n reloader-new deployment/reloader` (or check `results/<scenario>/new/reloader.log`)
2. Resource usage: `kubectl top pods -n reloader-new`
3. Events: `kubectl get events -n reloader-test`
## Parallel Execution
The `--parallelism` option enables running scenarios on multiple kind clusters simultaneously, significantly reducing total test time.
### How It Works
1. **Multiple Clusters**: Creates N kind clusters named `reloader-loadtest-0`, `reloader-loadtest-1`, etc.
2. **Separate Prometheus**: Each cluster gets its own Prometheus instance with a unique port (9091, 9092, etc.)
3. **Worker Pool**: Scenarios are distributed to workers via a channel, with each worker running on its own cluster
4. **Independent Execution**: Each scenario runs in complete isolation with no resource contention
### Usage
```bash
# Run 4 scenarios at a time (creates 4 clusters)
./loadtest run --new-image=my-image:tag --parallelism=4
# Run all 13 scenarios in parallel (creates 13 clusters)
./loadtest run --new-image=my-image:tag --parallelism=13 --scenario=all
```
### Resource Requirements
Parallel execution requires significant system resources:
| Parallelism | Clusters | Est. Memory | Est. CPU |
|-------------|----------|-------------|----------|
| 1 (default) | 1 | ~4GB | 2-4 cores |
| 4 | 4 | ~16GB | 8-16 cores |
| 13 | 13 | ~52GB | 26-52 cores |
### Notes
- The `--skip-cluster` option is not supported with parallelism > 1
- Each worker loads images independently, so initial setup takes longer
- All results are written to the same `--results-dir` with per-scenario subdirectories
- If a cluster setup fails, remaining workers continue with available clusters
- Parallelism automatically reduces to match scenario count if set higher
## CI Integration
### GitHub Actions
Load tests can be triggered on pull requests by commenting `/loadtest`:
```
/loadtest
```
This will:
1. Build a container image from the PR branch
2. Run all load test scenarios against it
3. Post results as a PR comment
4. Upload detailed results as artifacts
### Make Target
Run load tests locally or in CI:
```bash
# From repository root
make loadtest
```
This builds the container image and runs all scenarios with a 60-second duration.

View File

@@ -0,0 +1,7 @@
package main
import "github.com/stakater/Reloader/test/loadtest/internal/cmd"
func main() {
cmd.Execute()
}

52
test/loadtest/go.mod Normal file
View File

@@ -0,0 +1,52 @@
module github.com/stakater/Reloader/test/loadtest
go 1.26
require (
github.com/spf13/cobra v1.8.1
k8s.io/api v0.31.0
k8s.io/apimachinery v0.31.0
k8s.io/client-go v0.31.0
)
require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/term v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)

160
test/loadtest/go.sum Normal file
View File

@@ -0,0 +1,160 @@
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU=
github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM=
github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw=
github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo=
k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE=
k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc=
k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8=
k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=

View File

@@ -0,0 +1,314 @@
// Package cluster provides kind cluster management functionality.
package cluster
import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"strings"
"time"
)
// Config holds configuration for kind cluster operations.
type Config struct {
Name string
ContainerRuntime string // "docker" or "podman"
PortOffset int // Offset for host port mappings (for parallel clusters)
}
// Manager handles kind cluster operations.
type Manager struct {
cfg Config
}
// NewManager creates a new cluster manager.
func NewManager(cfg Config) *Manager {
return &Manager{cfg: cfg}
}
// DetectContainerRuntime finds available container runtime.
// It checks if the runtime daemon is actually running, not just if the binary exists.
func DetectContainerRuntime() (string, error) {
if _, err := exec.LookPath("docker"); err == nil {
cmd := exec.Command("docker", "info")
if err := cmd.Run(); err == nil {
return "docker", nil
}
}
if _, err := exec.LookPath("podman"); err == nil {
cmd := exec.Command("podman", "info")
if err := cmd.Run(); err == nil {
return "podman", nil
}
}
return "", fmt.Errorf("neither docker nor podman is running")
}
// Exists checks if the cluster already exists.
func (m *Manager) Exists() bool {
cmd := exec.Command("kind", "get", "clusters")
out, err := cmd.Output()
if err != nil {
return false
}
for _, line := range strings.Split(string(out), "\n") {
if strings.TrimSpace(line) == m.cfg.Name {
return true
}
}
return false
}
// Delete deletes the kind cluster.
func (m *Manager) Delete(ctx context.Context) error {
cmd := exec.CommandContext(ctx, "kind", "delete", "cluster", "--name", m.cfg.Name)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
// Create creates a new kind cluster with optimized settings.
func (m *Manager) Create(ctx context.Context) error {
if m.cfg.ContainerRuntime == "podman" {
os.Setenv("KIND_EXPERIMENTAL_PROVIDER", "podman")
}
if m.Exists() {
fmt.Printf("Cluster %s already exists, deleting...\n", m.cfg.Name)
if err := m.Delete(ctx); err != nil {
return fmt.Errorf("deleting existing cluster: %w", err)
}
}
httpPort := 8080 + m.cfg.PortOffset
httpsPort := 8443 + m.cfg.PortOffset
config := fmt.Sprintf(`kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
networking:
podSubnet: "10.244.0.0/16"
serviceSubnet: "10.96.0.0/16"
nodes:
- role: control-plane
kubeadmConfigPatches:
- |
kind: InitConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "ingress-ready=true"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
- |
kind: ClusterConfiguration
apiServer:
extraArgs:
max-requests-inflight: "800"
max-mutating-requests-inflight: "400"
watch-cache-sizes: "configmaps#1000,secrets#1000,pods#1000"
controllerManager:
extraArgs:
kube-api-qps: "200"
kube-api-burst: "200"
scheduler:
extraArgs:
kube-api-qps: "200"
kube-api-burst: "200"
extraPortMappings:
- containerPort: 80
hostPort: %d
protocol: TCP
- containerPort: 443
hostPort: %d
protocol: TCP
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
max-pods: "250"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
max-pods: "250"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
max-pods: "250"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
max-pods: "250"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
max-pods: "250"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
max-pods: "250"
kube-api-qps: "50"
kube-api-burst: "100"
serialize-image-pulls: "false"
event-qps: "50"
event-burst: "100"
`, httpPort, httpsPort)
cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", m.cfg.Name, "--config=-")
cmd.Stdin = strings.NewReader(config)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
// GetKubeconfig returns the kubeconfig for the cluster.
func (m *Manager) GetKubeconfig() (string, error) {
cmd := exec.Command("kind", "get", "kubeconfig", "--name", m.cfg.Name)
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("getting kubeconfig: %w", err)
}
return string(out), nil
}
// Context returns the kubectl context name for this cluster.
func (m *Manager) Context() string {
return "kind-" + m.cfg.Name
}
// Name returns the cluster name.
func (m *Manager) Name() string {
return m.cfg.Name
}
// LoadImage loads a container image into the kind cluster.
func (m *Manager) LoadImage(ctx context.Context, image string) error {
if !m.imageExistsLocally(image) {
fmt.Printf(" Image not found locally, pulling: %s\n", image)
pullCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image)
pullCmd.Stdout = os.Stdout
pullCmd.Stderr = os.Stderr
if err := pullCmd.Run(); err != nil {
return fmt.Errorf("pulling image %s: %w", image, err)
}
} else {
fmt.Printf(" Image found locally: %s\n", image)
}
fmt.Printf(" Copying image to kind cluster...\n")
if m.cfg.ContainerRuntime == "podman" {
tmpFile := fmt.Sprintf("/tmp/kind-image-%d.tar", time.Now().UnixNano())
defer os.Remove(tmpFile)
saveCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "save", image, "-o", tmpFile)
if err := saveCmd.Run(); err != nil {
return fmt.Errorf("saving image %s: %w", image, err)
}
loadCmd := exec.CommandContext(ctx, "kind", "load", "image-archive", tmpFile, "--name", m.cfg.Name)
loadCmd.Stdout = os.Stdout
loadCmd.Stderr = os.Stderr
if err := loadCmd.Run(); err != nil {
return fmt.Errorf("loading image archive: %w", err)
}
} else {
loadCmd := exec.CommandContext(ctx, "kind", "load", "docker-image", image, "--name", m.cfg.Name)
loadCmd.Stdout = os.Stdout
loadCmd.Stderr = os.Stderr
if err := loadCmd.Run(); err != nil {
return fmt.Errorf("loading image %s: %w", image, err)
}
}
return nil
}
// imageExistsLocally checks if an image exists in the local container runtime.
func (m *Manager) imageExistsLocally(image string) bool {
cmd := exec.Command(m.cfg.ContainerRuntime, "image", "exists", image)
if err := cmd.Run(); err == nil {
return true
}
cmd = exec.Command(m.cfg.ContainerRuntime, "image", "inspect", image)
if err := cmd.Run(); err == nil {
return true
}
cmd = exec.Command(m.cfg.ContainerRuntime, "images", "--format", "{{.Repository}}:{{.Tag}}")
out, err := cmd.Output()
if err == nil {
for _, line := range strings.Split(string(out), "\n") {
if strings.TrimSpace(line) == image {
return true
}
}
}
return false
}
// PullImage pulls an image using the container runtime.
func (m *Manager) PullImage(ctx context.Context, image string) error {
cmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd.Run()
}
// ExecKubectl runs a kubectl command against the cluster.
func (m *Manager) ExecKubectl(ctx context.Context, args ...string) ([]byte, error) {
cmd := exec.CommandContext(ctx, "kubectl", args...)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("%w: %s", err, stderr.String())
}
return stdout.Bytes(), nil
}

View File

@@ -0,0 +1,860 @@
package cmd
import (
"encoding/json"
"fmt"
"log"
"math"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/spf13/cobra"
)
var (
reportScenario string
reportResultsDir string
reportOutputFile string
reportFormat string
)
var reportCmd = &cobra.Command{
Use: "report",
Short: "Generate comparison report for a scenario",
Long: `Generate a detailed report for a specific test scenario.
Examples:
# Generate report for a scenario
loadtest report --scenario=S2 --results-dir=./results
# Generate JSON report
loadtest report --scenario=S2 --format=json`,
Run: func(cmd *cobra.Command, args []string) {
reportCommand()
},
}
func init() {
reportCmd.Flags().StringVar(&reportScenario, "scenario", "", "Scenario to report on (required)")
reportCmd.Flags().StringVar(&reportResultsDir, "results-dir", "./results", "Directory containing results")
reportCmd.Flags().StringVar(&reportOutputFile, "output", "", "Output file (default: stdout)")
reportCmd.Flags().StringVar(&reportFormat, "format", "text", "Output format: text, json, markdown")
reportCmd.MarkFlagRequired("scenario")
}
// PrometheusResponse represents a Prometheus API response for report parsing.
type PrometheusResponse struct {
Status string `json:"status"`
Data struct {
ResultType string `json:"resultType"`
Result []struct {
Metric map[string]string `json:"metric"`
Value []interface{} `json:"value"`
} `json:"result"`
} `json:"data"`
}
// MetricComparison represents the comparison of a single metric.
type MetricComparison struct {
Name string `json:"name"`
DisplayName string `json:"display_name"`
Unit string `json:"unit"`
IsCounter bool `json:"is_counter"`
OldValue float64 `json:"old_value"`
NewValue float64 `json:"new_value"`
Expected float64 `json:"expected"`
Difference float64 `json:"difference"`
DiffPct float64 `json:"diff_pct"`
Status string `json:"status"`
Threshold float64 `json:"threshold"`
OldMeetsExpected string `json:"old_meets_expected"`
NewMeetsExpected string `json:"new_meets_expected"`
}
type metricInfo struct {
unit string
isCounter bool
}
var metricInfoMap = map[string]metricInfo{
"reconcile_total": {unit: "count", isCounter: true},
"reconcile_duration_p50": {unit: "s", isCounter: false},
"reconcile_duration_p95": {unit: "s", isCounter: false},
"reconcile_duration_p99": {unit: "s", isCounter: false},
"action_total": {unit: "count", isCounter: true},
"action_latency_p50": {unit: "s", isCounter: false},
"action_latency_p95": {unit: "s", isCounter: false},
"action_latency_p99": {unit: "s", isCounter: false},
"errors_total": {unit: "count", isCounter: true},
"reload_executed_total": {unit: "count", isCounter: true},
"workloads_scanned_total": {unit: "count", isCounter: true},
"workloads_matched_total": {unit: "count", isCounter: true},
"skipped_total_no_data_change": {unit: "count", isCounter: true},
"rest_client_requests_total": {unit: "count", isCounter: true},
"rest_client_requests_get": {unit: "count", isCounter: true},
"rest_client_requests_patch": {unit: "count", isCounter: true},
"rest_client_requests_put": {unit: "count", isCounter: true},
"rest_client_requests_errors": {unit: "count", isCounter: true},
"memory_rss_mb_avg": {unit: "MB", isCounter: false},
"memory_rss_mb_max": {unit: "MB", isCounter: false},
"memory_heap_mb_avg": {unit: "MB", isCounter: false},
"memory_heap_mb_max": {unit: "MB", isCounter: false},
"cpu_cores_avg": {unit: "cores", isCounter: false},
"cpu_cores_max": {unit: "cores", isCounter: false},
"goroutines_avg": {unit: "count", isCounter: false},
"goroutines_max": {unit: "count", isCounter: false},
"gc_pause_p99_ms": {unit: "ms", isCounter: false},
}
// ReportExpectedMetrics matches the expected metrics from test scenarios.
type ReportExpectedMetrics struct {
ActionTotal int `json:"action_total"`
ReloadExecutedTotal int `json:"reload_executed_total"`
ReconcileTotal int `json:"reconcile_total"`
WorkloadsScannedTotal int `json:"workloads_scanned_total"`
WorkloadsMatchedTotal int `json:"workloads_matched_total"`
SkippedTotal int `json:"skipped_total"`
Description string `json:"description"`
}
// ScenarioReport represents the full report for a scenario.
type ScenarioReport struct {
Scenario string `json:"scenario"`
Timestamp time.Time `json:"timestamp"`
Comparisons []MetricComparison `json:"comparisons"`
OverallStatus string `json:"overall_status"`
Summary string `json:"summary"`
PassCriteria []string `json:"pass_criteria"`
FailedCriteria []string `json:"failed_criteria"`
Expected ReportExpectedMetrics `json:"expected"`
TestDescription string `json:"test_description"`
}
// MetricType defines how to evaluate a metric.
type MetricType int
const (
LowerIsBetter MetricType = iota
ShouldMatch
HigherIsBetter
Informational
)
type thresholdConfig struct {
maxDiff float64
metricType MetricType
minAbsDiff float64
}
var thresholds = map[string]thresholdConfig{
"reconcile_total": {maxDiff: 60.0, metricType: LowerIsBetter},
"reconcile_duration_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5},
"reconcile_duration_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
"reconcile_duration_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
"action_latency_p50": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.5},
"action_latency_p95": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
"action_latency_p99": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 1.0},
"errors_total": {maxDiff: 0.0, metricType: LowerIsBetter},
"action_total": {maxDiff: 15.0, metricType: ShouldMatch},
"reload_executed_total": {maxDiff: 15.0, metricType: ShouldMatch},
"workloads_scanned_total": {maxDiff: 15.0, metricType: ShouldMatch},
"workloads_matched_total": {maxDiff: 15.0, metricType: ShouldMatch},
"skipped_total_no_data_change": {maxDiff: 20.0, metricType: ShouldMatch},
"rest_client_requests_total": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50},
"rest_client_requests_get": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50},
"rest_client_requests_patch": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 50},
"rest_client_requests_put": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 20},
"rest_client_requests_errors": {maxDiff: 0.0, metricType: LowerIsBetter, minAbsDiff: 100},
"memory_rss_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20},
"memory_rss_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 30},
"memory_heap_mb_avg": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 15},
"memory_heap_mb_max": {maxDiff: 50.0, metricType: LowerIsBetter, minAbsDiff: 20},
"cpu_cores_avg": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.1},
"cpu_cores_max": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 0.2},
"goroutines_avg": {metricType: Informational},
"goroutines_max": {metricType: Informational},
"gc_pause_p99_ms": {maxDiff: 100.0, metricType: LowerIsBetter, minAbsDiff: 5},
}
func reportCommand() {
if reportScenario == "" {
log.Fatal("--scenario is required for report command")
}
report, err := generateScenarioReport(reportScenario, reportResultsDir)
if err != nil {
log.Fatalf("Failed to generate report: %v", err)
}
var output string
switch OutputFormat(reportFormat) {
case OutputFormatJSON:
output = renderScenarioReportJSON(report)
case OutputFormatMarkdown:
output = renderScenarioReportMarkdown(report)
default:
output = renderScenarioReport(report)
}
if reportOutputFile != "" {
if err := os.WriteFile(reportOutputFile, []byte(output), 0644); err != nil {
log.Fatalf("Failed to write output file: %v", err)
}
log.Printf("Report written to %s", reportOutputFile)
} else {
fmt.Println(output)
}
}
func generateScenarioReport(scenario, resultsDir string) (*ScenarioReport, error) {
oldDir := filepath.Join(resultsDir, scenario, "old")
newDir := filepath.Join(resultsDir, scenario, "new")
scenarioDir := filepath.Join(resultsDir, scenario)
_, oldErr := os.Stat(oldDir)
_, newErr := os.Stat(newDir)
hasOld := oldErr == nil
hasNew := newErr == nil
isComparison := hasOld && hasNew
singleVersion := ""
singleDir := ""
if !isComparison {
if hasNew {
singleVersion = "new"
singleDir = newDir
} else if hasOld {
singleVersion = "old"
singleDir = oldDir
} else {
return nil, fmt.Errorf("no results found in %s", scenarioDir)
}
}
report := &ScenarioReport{
Scenario: scenario,
Timestamp: time.Now(),
}
expectedPath := filepath.Join(scenarioDir, "expected.json")
if data, err := os.ReadFile(expectedPath); err == nil {
if err := json.Unmarshal(data, &report.Expected); err != nil {
log.Printf("Warning: Could not parse expected metrics: %v", err)
} else {
report.TestDescription = report.Expected.Description
}
}
if !isComparison {
return generateSingleVersionReport(report, singleDir, singleVersion, scenario)
}
metricsToCompare := []struct {
name string
file string
selector func(data PrometheusResponse) float64
}{
{"reconcile_total", "reloader_reconcile_total.json", sumAllValues},
{"reconcile_duration_p50", "reconcile_p50.json", getFirstValue},
{"reconcile_duration_p95", "reconcile_p95.json", getFirstValue},
{"reconcile_duration_p99", "reconcile_p99.json", getFirstValue},
{"action_total", "reloader_action_total.json", sumAllValues},
{"action_latency_p50", "action_p50.json", getFirstValue},
{"action_latency_p95", "action_p95.json", getFirstValue},
{"action_latency_p99", "action_p99.json", getFirstValue},
{"errors_total", "reloader_errors_total.json", sumAllValues},
{"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues},
{"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues},
{"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues},
{"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue},
{"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue},
{"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue},
{"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue},
{"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue},
{"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB},
{"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB},
{"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB},
{"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB},
{"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue},
{"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue},
{"goroutines_avg", "goroutines_avg.json", getFirstValue},
{"goroutines_max", "goroutines_max.json", getFirstValue},
{"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs},
}
expectedValues := map[string]float64{
"action_total": float64(report.Expected.ActionTotal),
"reload_executed_total": float64(report.Expected.ReloadExecutedTotal),
"reconcile_total": float64(report.Expected.ReconcileTotal),
"workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal),
"workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal),
"skipped_total": float64(report.Expected.SkippedTotal),
}
metricValues := make(map[string]struct{ old, new, expected float64 })
for _, m := range metricsToCompare {
oldData, err := loadMetricFile(filepath.Join(oldDir, m.file))
if err != nil {
log.Printf("Warning: Could not load old metric %s: %v", m.name, err)
continue
}
newData, err := loadMetricFile(filepath.Join(newDir, m.file))
if err != nil {
log.Printf("Warning: Could not load new metric %s: %v", m.name, err)
continue
}
oldValue := m.selector(oldData)
newValue := m.selector(newData)
expected := expectedValues[m.name]
metricValues[m.name] = struct{ old, new, expected float64 }{oldValue, newValue, expected}
}
newMeetsActionExpected := false
newReconcileIsZero := false
isChurnScenario := scenario == "S5"
if v, ok := metricValues["action_total"]; ok && v.expected > 0 {
tolerance := v.expected * 0.15
newMeetsActionExpected = math.Abs(v.new-v.expected) <= tolerance
}
if v, ok := metricValues["reconcile_total"]; ok {
newReconcileIsZero = v.new == 0
}
for _, m := range metricsToCompare {
v, ok := metricValues[m.name]
if !ok {
continue
}
comparison := compareMetricWithExpected(m.name, v.old, v.new, v.expected)
if strings.HasPrefix(m.name, "rest_client_requests") {
if newMeetsActionExpected && comparison.Status != "pass" {
if oldMeets, ok := metricValues["action_total"]; ok {
oldTolerance := oldMeets.expected * 0.15
oldMissed := math.Abs(oldMeets.old-oldMeets.expected) > oldTolerance
if oldMissed {
comparison.Status = "pass"
}
}
}
if newReconcileIsZero && comparison.Status != "pass" {
comparison.Status = "pass"
}
}
if isChurnScenario {
if m.name == "errors_total" {
if v.new < 50 && v.old < 50 {
comparison.Status = "pass"
} else if v.new <= v.old*1.5 {
comparison.Status = "pass"
}
}
if m.name == "action_total" || m.name == "reload_executed_total" {
if v.old > 0 {
diff := math.Abs(v.new-v.old) / v.old * 100
if diff <= 20 {
comparison.Status = "pass"
}
} else if v.new > 0 {
comparison.Status = "pass"
}
}
}
report.Comparisons = append(report.Comparisons, comparison)
if comparison.Status == "pass" {
report.PassCriteria = append(report.PassCriteria, m.name)
} else if comparison.Status == "fail" {
report.FailedCriteria = append(report.FailedCriteria, m.name)
}
}
if len(report.FailedCriteria) == 0 {
report.OverallStatus = "PASS"
report.Summary = "All metrics within acceptable thresholds"
} else {
report.OverallStatus = "FAIL"
report.Summary = fmt.Sprintf("%d metrics failed: %s",
len(report.FailedCriteria),
strings.Join(report.FailedCriteria, ", "))
}
return report, nil
}
func generateSingleVersionReport(report *ScenarioReport, dataDir, version, scenario string) (*ScenarioReport, error) {
metricsToCollect := []struct {
name string
file string
selector func(data PrometheusResponse) float64
}{
{"reconcile_total", "reloader_reconcile_total.json", sumAllValues},
{"reconcile_duration_p50", "reconcile_p50.json", getFirstValue},
{"reconcile_duration_p95", "reconcile_p95.json", getFirstValue},
{"reconcile_duration_p99", "reconcile_p99.json", getFirstValue},
{"action_total", "reloader_action_total.json", sumAllValues},
{"action_latency_p50", "action_p50.json", getFirstValue},
{"action_latency_p95", "action_p95.json", getFirstValue},
{"action_latency_p99", "action_p99.json", getFirstValue},
{"errors_total", "reloader_errors_total.json", sumAllValues},
{"reload_executed_total", "reloader_reload_executed_total.json", sumSuccessValues},
{"workloads_scanned_total", "reloader_workloads_scanned_total.json", sumAllValues},
{"workloads_matched_total", "reloader_workloads_matched_total.json", sumAllValues},
{"rest_client_requests_total", "rest_client_requests_total.json", getFirstValue},
{"rest_client_requests_get", "rest_client_requests_get.json", getFirstValue},
{"rest_client_requests_patch", "rest_client_requests_patch.json", getFirstValue},
{"rest_client_requests_put", "rest_client_requests_put.json", getFirstValue},
{"rest_client_requests_errors", "rest_client_requests_errors.json", getFirstValue},
{"memory_rss_mb_avg", "memory_rss_bytes_avg.json", bytesToMB},
{"memory_rss_mb_max", "memory_rss_bytes_max.json", bytesToMB},
{"memory_heap_mb_avg", "memory_heap_bytes_avg.json", bytesToMB},
{"memory_heap_mb_max", "memory_heap_bytes_max.json", bytesToMB},
{"cpu_cores_avg", "cpu_usage_cores_avg.json", getFirstValue},
{"cpu_cores_max", "cpu_usage_cores_max.json", getFirstValue},
{"goroutines_avg", "goroutines_avg.json", getFirstValue},
{"goroutines_max", "goroutines_max.json", getFirstValue},
{"gc_pause_p99_ms", "gc_duration_seconds_p99.json", secondsToMs},
}
expectedValues := map[string]float64{
"action_total": float64(report.Expected.ActionTotal),
"reload_executed_total": float64(report.Expected.ReloadExecutedTotal),
"reconcile_total": float64(report.Expected.ReconcileTotal),
"workloads_scanned_total": float64(report.Expected.WorkloadsScannedTotal),
"workloads_matched_total": float64(report.Expected.WorkloadsMatchedTotal),
"skipped_total": float64(report.Expected.SkippedTotal),
}
for _, m := range metricsToCollect {
data, err := loadMetricFile(filepath.Join(dataDir, m.file))
if err != nil {
log.Printf("Warning: Could not load metric %s: %v", m.name, err)
continue
}
value := m.selector(data)
expected := expectedValues[m.name]
info := metricInfoMap[m.name]
if info.unit == "" {
info = metricInfo{unit: "count", isCounter: true}
}
displayName := m.name
if info.unit != "count" {
displayName = fmt.Sprintf("%s (%s)", m.name, info.unit)
}
status := "info"
meetsExp := "-"
if expected > 0 {
meetsExp = meetsExpected(value, expected)
threshold, ok := thresholds[m.name]
if ok && threshold.metricType == ShouldMatch {
if meetsExp == "✓" {
status = "pass"
report.PassCriteria = append(report.PassCriteria, m.name)
} else {
status = "fail"
report.FailedCriteria = append(report.FailedCriteria, m.name)
}
}
}
if info.isCounter {
value = math.Round(value)
}
report.Comparisons = append(report.Comparisons, MetricComparison{
Name: m.name,
DisplayName: displayName,
Unit: info.unit,
IsCounter: info.isCounter,
OldValue: 0,
NewValue: value,
Expected: expected,
OldMeetsExpected: "-",
NewMeetsExpected: meetsExp,
Status: status,
})
}
if len(report.FailedCriteria) == 0 {
report.OverallStatus = "PASS"
report.Summary = fmt.Sprintf("Single-version test (%s) completed successfully", version)
} else {
report.OverallStatus = "FAIL"
report.Summary = fmt.Sprintf("%d metrics failed: %s",
len(report.FailedCriteria),
strings.Join(report.FailedCriteria, ", "))
}
return report, nil
}
func loadMetricFile(path string) (PrometheusResponse, error) {
var resp PrometheusResponse
data, err := os.ReadFile(path)
if err != nil {
return resp, err
}
err = json.Unmarshal(data, &resp)
return resp, err
}
func sumAllValues(data PrometheusResponse) float64 {
var sum float64
for _, result := range data.Data.Result {
if len(result.Value) >= 2 {
if v, ok := result.Value[1].(string); ok {
var f float64
fmt.Sscanf(v, "%f", &f)
sum += f
}
}
}
return sum
}
func sumSuccessValues(data PrometheusResponse) float64 {
var sum float64
for _, result := range data.Data.Result {
if result.Metric["success"] == "true" {
if len(result.Value) >= 2 {
if v, ok := result.Value[1].(string); ok {
var f float64
fmt.Sscanf(v, "%f", &f)
sum += f
}
}
}
}
return sum
}
func getFirstValue(data PrometheusResponse) float64 {
if len(data.Data.Result) > 0 && len(data.Data.Result[0].Value) >= 2 {
if v, ok := data.Data.Result[0].Value[1].(string); ok {
var f float64
fmt.Sscanf(v, "%f", &f)
return f
}
}
return 0
}
func bytesToMB(data PrometheusResponse) float64 {
bytes := getFirstValue(data)
return bytes / (1024 * 1024)
}
func secondsToMs(data PrometheusResponse) float64 {
seconds := getFirstValue(data)
return seconds * 1000
}
func meetsExpected(value, expected float64) string {
if expected == 0 {
return "-"
}
tolerance := expected * 0.15
if math.Abs(value-expected) <= tolerance {
return "✓"
}
return "✗"
}
func compareMetricWithExpected(name string, oldValue, newValue, expected float64) MetricComparison {
diff := newValue - oldValue
absDiff := math.Abs(diff)
var diffPct float64
if oldValue != 0 {
diffPct = (diff / oldValue) * 100
} else if newValue != 0 {
diffPct = 100
}
threshold, ok := thresholds[name]
if !ok {
threshold = thresholdConfig{maxDiff: 10.0, metricType: ShouldMatch}
}
info := metricInfoMap[name]
if info.unit == "" {
info = metricInfo{unit: "count", isCounter: true}
}
displayName := name
if info.unit != "count" {
displayName = fmt.Sprintf("%s (%s)", name, info.unit)
}
if info.isCounter {
oldValue = math.Round(oldValue)
newValue = math.Round(newValue)
}
status := "pass"
oldMeetsExp := meetsExpected(oldValue, expected)
newMeetsExp := meetsExpected(newValue, expected)
isNewMetric := info.isCounter && oldValue == 0 && newValue > 0 && expected == 0
if isNewMetric {
status = "info"
} else if expected > 0 && threshold.metricType == ShouldMatch {
if newMeetsExp == "✗" {
status = "fail"
}
} else {
switch threshold.metricType {
case LowerIsBetter:
if threshold.minAbsDiff > 0 && absDiff < threshold.minAbsDiff {
status = "pass"
} else if diffPct > threshold.maxDiff {
status = "fail"
}
case HigherIsBetter:
if diffPct < -threshold.maxDiff {
status = "fail"
}
case ShouldMatch:
if math.Abs(diffPct) > threshold.maxDiff {
status = "fail"
}
case Informational:
status = "info"
}
}
return MetricComparison{
Name: name,
DisplayName: displayName,
Unit: info.unit,
IsCounter: info.isCounter,
Expected: expected,
OldMeetsExpected: oldMeetsExp,
NewMeetsExpected: newMeetsExp,
OldValue: oldValue,
NewValue: newValue,
Difference: diff,
DiffPct: diffPct,
Status: status,
Threshold: threshold.maxDiff,
}
}
func renderScenarioReport(report *ScenarioReport) string {
var sb strings.Builder
isSingleVersion := true
for _, c := range report.Comparisons {
if c.OldValue != 0 {
isSingleVersion = false
break
}
}
sb.WriteString("\n")
sb.WriteString("================================================================================\n")
if isSingleVersion {
sb.WriteString(" RELOADER TEST REPORT\n")
} else {
sb.WriteString(" RELOADER A/B COMPARISON REPORT\n")
}
sb.WriteString("================================================================================\n\n")
fmt.Fprintf(&sb, "Scenario: %s\n", report.Scenario)
fmt.Fprintf(&sb, "Generated: %s\n", report.Timestamp.Format("2006-01-02 15:04:05"))
fmt.Fprintf(&sb, "Status: %s\n", report.OverallStatus)
fmt.Fprintf(&sb, "Summary: %s\n", report.Summary)
if report.TestDescription != "" {
fmt.Fprintf(&sb, "Test: %s\n", report.TestDescription)
}
if report.Expected.ActionTotal > 0 {
sb.WriteString("\n--------------------------------------------------------------------------------\n")
sb.WriteString(" EXPECTED VALUES\n")
sb.WriteString("--------------------------------------------------------------------------------\n")
fmt.Fprintf(&sb, "Expected Action Total: %d\n", report.Expected.ActionTotal)
fmt.Fprintf(&sb, "Expected Reload Executed Total: %d\n", report.Expected.ReloadExecutedTotal)
if report.Expected.SkippedTotal > 0 {
fmt.Fprintf(&sb, "Expected Skipped Total: %d\n", report.Expected.SkippedTotal)
}
}
sb.WriteString("\n--------------------------------------------------------------------------------\n")
if isSingleVersion {
sb.WriteString(" METRICS\n")
} else {
sb.WriteString(" METRIC COMPARISONS\n")
}
sb.WriteString("--------------------------------------------------------------------------------\n")
if isSingleVersion {
sb.WriteString("(✓ = meets expected value within 15%)\n\n")
fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n",
"Metric", "Value", "Expected", "Met?", "Status")
fmt.Fprintf(&sb, "%-32s %12s %10s %5s %8s\n",
"------", "-----", "--------", "----", "------")
for _, c := range report.Comparisons {
if c.IsCounter {
if c.Expected > 0 {
fmt.Fprintf(&sb, "%-32s %12.0f %10.0f %5s %8s\n",
c.DisplayName, c.NewValue, c.Expected,
c.NewMeetsExpected, c.Status)
} else {
fmt.Fprintf(&sb, "%-32s %12.0f %10s %5s %8s\n",
c.DisplayName, c.NewValue, "-",
c.NewMeetsExpected, c.Status)
}
} else {
fmt.Fprintf(&sb, "%-32s %12.4f %10s %5s %8s\n",
c.DisplayName, c.NewValue, "-",
c.NewMeetsExpected, c.Status)
}
}
} else {
sb.WriteString("(Old✓/New✓ = meets expected value within 15%)\n\n")
fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n",
"Metric", "Old", "New", "Expected", "Old✓", "New✓", "Status")
fmt.Fprintf(&sb, "%-32s %12s %12s %10s %5s %5s %8s\n",
"------", "---", "---", "--------", "----", "----", "------")
for _, c := range report.Comparisons {
if c.IsCounter {
if c.Expected > 0 {
fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10.0f %5s %5s %8s\n",
c.DisplayName, c.OldValue, c.NewValue, c.Expected,
c.OldMeetsExpected, c.NewMeetsExpected, c.Status)
} else {
fmt.Fprintf(&sb, "%-32s %12.0f %12.0f %10s %5s %5s %8s\n",
c.DisplayName, c.OldValue, c.NewValue, "-",
c.OldMeetsExpected, c.NewMeetsExpected, c.Status)
}
} else {
fmt.Fprintf(&sb, "%-32s %12.4f %12.4f %10s %5s %5s %8s\n",
c.DisplayName, c.OldValue, c.NewValue, "-",
c.OldMeetsExpected, c.NewMeetsExpected, c.Status)
}
}
}
sb.WriteString("\n--------------------------------------------------------------------------------\n")
sb.WriteString(" PASS/FAIL CRITERIA\n")
sb.WriteString("--------------------------------------------------------------------------------\n\n")
fmt.Fprintf(&sb, "Passed (%d):\n", len(report.PassCriteria))
for _, p := range report.PassCriteria {
fmt.Fprintf(&sb, " ✓ %s\n", p)
}
if len(report.FailedCriteria) > 0 {
fmt.Fprintf(&sb, "\nFailed (%d):\n", len(report.FailedCriteria))
for _, f := range report.FailedCriteria {
fmt.Fprintf(&sb, " ✗ %s\n", f)
}
}
sb.WriteString("\n--------------------------------------------------------------------------------\n")
sb.WriteString(" THRESHOLDS USED\n")
sb.WriteString("--------------------------------------------------------------------------------\n\n")
fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n",
"Metric", "Max Diff%", "Min Abs Diff", "Direction")
fmt.Fprintf(&sb, "%-35s %10s %15s %18s\n",
"------", "---------", "------------", "---------")
var names []string
for name := range thresholds {
names = append(names, name)
}
sort.Strings(names)
for _, name := range names {
t := thresholds[name]
var direction string
switch t.metricType {
case LowerIsBetter:
direction = "lower is better"
case HigherIsBetter:
direction = "higher is better"
case ShouldMatch:
direction = "should match"
case Informational:
direction = "info only"
}
minAbsDiff := "-"
if t.minAbsDiff > 0 {
minAbsDiff = fmt.Sprintf("%.1f", t.minAbsDiff)
}
fmt.Fprintf(&sb, "%-35s %9.1f%% %15s %18s\n",
name, t.maxDiff, minAbsDiff, direction)
}
sb.WriteString("\n================================================================================\n")
return sb.String()
}
func renderScenarioReportJSON(report *ScenarioReport) string {
data, err := json.MarshalIndent(report, "", " ")
if err != nil {
return fmt.Sprintf(`{"error": "%s"}`, err.Error())
}
return string(data)
}
func renderScenarioReportMarkdown(report *ScenarioReport) string {
var sb strings.Builder
emoji := "✅"
if report.OverallStatus != "PASS" {
emoji = "❌"
}
sb.WriteString(fmt.Sprintf("## %s %s: %s\n\n", emoji, report.Scenario, report.OverallStatus))
if report.TestDescription != "" {
sb.WriteString(fmt.Sprintf("> %s\n\n", report.TestDescription))
}
sb.WriteString("| Metric | Value | Expected | Status |\n")
sb.WriteString("|--------|------:|:--------:|:------:|\n")
keyMetrics := []string{"action_total", "reload_executed_total", "errors_total", "reconcile_total"}
for _, name := range keyMetrics {
for _, c := range report.Comparisons {
if c.Name == name {
value := fmt.Sprintf("%.0f", c.NewValue)
expected := "-"
if c.Expected > 0 {
expected = fmt.Sprintf("%.0f", c.Expected)
}
status := "✅"
if c.Status == "fail" {
status = "❌"
} else if c.Status == "info" {
status = ""
}
sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s |\n", c.DisplayName, value, expected, status))
break
}
}
}
return sb.String()
}

View File

@@ -0,0 +1,43 @@
package cmd
import (
"os"
"github.com/spf13/cobra"
)
const (
// DefaultClusterName is the default kind cluster name.
DefaultClusterName = "reloader-loadtest"
// TestNamespace is the namespace used for test resources.
TestNamespace = "reloader-test"
)
// OutputFormat defines the output format for reports.
type OutputFormat string
const (
OutputFormatText OutputFormat = "text"
OutputFormatJSON OutputFormat = "json"
OutputFormatMarkdown OutputFormat = "markdown"
)
// rootCmd is the base command.
var rootCmd = &cobra.Command{
Use: "loadtest",
Short: "Reloader Load Test CLI",
Long: `A CLI tool for running A/B comparison load tests on Reloader.`,
}
func init() {
rootCmd.AddCommand(runCmd)
rootCmd.AddCommand(reportCmd)
rootCmd.AddCommand(summaryCmd)
}
// Execute runs the root command.
func Execute() {
if err := rootCmd.Execute(); err != nil {
os.Exit(1)
}
}

View File

@@ -0,0 +1,648 @@
package cmd
import (
"context"
"fmt"
"log"
"os"
"os/exec"
"os/signal"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
"github.com/spf13/cobra"
"github.com/stakater/Reloader/test/loadtest/internal/cluster"
"github.com/stakater/Reloader/test/loadtest/internal/prometheus"
"github.com/stakater/Reloader/test/loadtest/internal/reloader"
"github.com/stakater/Reloader/test/loadtest/internal/scenarios"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)
// RunConfig holds CLI configuration for the run command.
type RunConfig struct {
OldImage string
NewImage string
Scenario string
Duration int
SkipCluster bool
SkipImageLoad bool
ClusterName string
ResultsDir string
ManifestsDir string
Parallelism int
}
// workerContext holds all resources for a single worker (cluster + prometheus).
type workerContext struct {
id int
clusterMgr *cluster.Manager
promMgr *prometheus.Manager
kubeClient kubernetes.Interface
kubeContext string
runtime string
}
var runCfg RunConfig
var runCmd = &cobra.Command{
Use: "run",
Short: "Run A/B comparison tests",
Long: `Run load tests comparing old and new versions of Reloader.
Examples:
# Compare two images
loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=stakater/reloader:v1.1.0
# Run specific scenario
loadtest run --old-image=stakater/reloader:v1.0.0 --new-image=localhost/reloader:dev --scenario=S2
# Test single image (no comparison)
loadtest run --new-image=localhost/reloader:test
# Run all scenarios in parallel on 4 clusters
loadtest run --new-image=localhost/reloader:test --parallelism=4`,
Run: func(cmd *cobra.Command, args []string) {
runCommand()
},
}
func init() {
runCmd.Flags().StringVar(&runCfg.OldImage, "old-image", "", "Container image for \"old\" version (required for comparison)")
runCmd.Flags().StringVar(&runCfg.NewImage, "new-image", "", "Container image for \"new\" version (required for comparison)")
runCmd.Flags().StringVar(&runCfg.Scenario, "scenario", "all", "Test scenario: S1-S13 or \"all\"")
runCmd.Flags().IntVar(&runCfg.Duration, "duration", 60, "Test duration in seconds")
runCmd.Flags().IntVar(&runCfg.Parallelism, "parallelism", 1, "Run N scenarios in parallel on N clusters")
runCmd.Flags().BoolVar(&runCfg.SkipCluster, "skip-cluster", false, "Skip kind cluster creation (use existing)")
runCmd.Flags().BoolVar(&runCfg.SkipImageLoad, "skip-image-load", false, "Skip loading images into kind (use when images already loaded)")
runCmd.Flags().StringVar(&runCfg.ClusterName, "cluster-name", DefaultClusterName, "Kind cluster name")
runCmd.Flags().StringVar(&runCfg.ResultsDir, "results-dir", "./results", "Directory for results")
runCmd.Flags().StringVar(&runCfg.ManifestsDir, "manifests-dir", "", "Directory containing manifests (auto-detected if not set)")
}
func runCommand() {
if runCfg.ManifestsDir == "" {
execPath, _ := os.Executable()
execDir := filepath.Dir(execPath)
runCfg.ManifestsDir = filepath.Join(execDir, "..", "..", "manifests")
if _, err := os.Stat(runCfg.ManifestsDir); os.IsNotExist(err) {
runCfg.ManifestsDir = "./manifests"
}
}
if runCfg.Parallelism < 1 {
runCfg.Parallelism = 1
}
if runCfg.OldImage == "" && runCfg.NewImage == "" {
log.Fatal("At least one of --old-image or --new-image is required")
}
runOld := runCfg.OldImage != ""
runNew := runCfg.NewImage != ""
runBoth := runOld && runNew
log.Printf("Configuration:")
log.Printf(" Scenario: %s", runCfg.Scenario)
log.Printf(" Duration: %ds", runCfg.Duration)
log.Printf(" Parallelism: %d", runCfg.Parallelism)
if runCfg.OldImage != "" {
log.Printf(" Old image: %s", runCfg.OldImage)
}
if runCfg.NewImage != "" {
log.Printf(" New image: %s", runCfg.NewImage)
}
runtime, err := cluster.DetectContainerRuntime()
if err != nil {
log.Fatalf("Failed to detect container runtime: %v", err)
}
log.Printf(" Container runtime: %s", runtime)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-sigCh
log.Println("Received shutdown signal...")
cancel()
}()
var scenariosToRun []string
if runCfg.Scenario == "all" {
scenariosToRun = []string{"S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S11", "S12", "S13"}
} else {
// Split comma-separated scenarios (e.g., "S1,S4,S6")
for _, s := range strings.Split(runCfg.Scenario, ",") {
if trimmed := strings.TrimSpace(s); trimmed != "" {
scenariosToRun = append(scenariosToRun, trimmed)
}
}
}
if runCfg.SkipCluster && runCfg.Parallelism > 1 {
log.Fatal("--skip-cluster is not supported with --parallelism > 1")
}
if runCfg.Parallelism > 1 {
runParallel(ctx, runCfg, scenariosToRun, runtime, runOld, runNew, runBoth)
return
}
runSequential(ctx, runCfg, scenariosToRun, runtime, runOld, runNew, runBoth)
}
func runSequential(ctx context.Context, cfg RunConfig, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) {
clusterMgr := cluster.NewManager(cluster.Config{
Name: cfg.ClusterName,
ContainerRuntime: runtime,
})
if cfg.SkipCluster {
log.Printf("Skipping cluster creation (using existing cluster: %s)", cfg.ClusterName)
if !clusterMgr.Exists() {
log.Fatalf("Cluster %s does not exist. Remove --skip-cluster to create it.", cfg.ClusterName)
}
} else {
log.Println("Creating kind cluster...")
if err := clusterMgr.Create(ctx); err != nil {
log.Fatalf("Failed to create cluster: %v", err)
}
}
promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml")
promMgr := prometheus.NewManager(promManifest)
log.Println("Installing Prometheus...")
if err := promMgr.Deploy(ctx); err != nil {
log.Fatalf("Failed to deploy Prometheus: %v", err)
}
if err := promMgr.StartPortForward(ctx); err != nil {
log.Fatalf("Failed to start Prometheus port-forward: %v", err)
}
defer promMgr.StopPortForward()
if cfg.SkipImageLoad {
log.Println("Skipping image loading (--skip-image-load)")
} else {
log.Println("Loading images into kind cluster...")
if runOld {
log.Printf("Loading old image: %s", cfg.OldImage)
if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil {
log.Fatalf("Failed to load old image: %v", err)
}
}
if runNew {
log.Printf("Loading new image: %s", cfg.NewImage)
if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil {
log.Fatalf("Failed to load new image: %v", err)
}
}
log.Println("Pre-loading test images...")
testImage := "gcr.io/google-containers/busybox:1.27"
clusterMgr.LoadImage(ctx, testImage)
}
kubeClient, err := getKubeClient("")
if err != nil {
log.Fatalf("Failed to create kubernetes client: %v", err)
}
for _, scenarioID := range scenariosToRun {
log.Printf("========================================")
log.Printf("=== Starting scenario %s ===", scenarioID)
log.Printf("========================================")
cleanupTestNamespaces(ctx, "")
reloader.CleanupByVersion(ctx, "old", "")
reloader.CleanupByVersion(ctx, "new", "")
if err := promMgr.Reset(ctx); err != nil {
log.Printf("Warning: failed to reset Prometheus: %v", err)
}
createTestNamespace(ctx, "")
if runOld {
oldMgr := reloader.NewManager(reloader.Config{
Version: "old",
Image: cfg.OldImage,
})
if err := oldMgr.Deploy(ctx); err != nil {
log.Printf("Failed to deploy old Reloader: %v", err)
continue
}
if err := promMgr.WaitForTarget(ctx, oldMgr.Job(), 60*time.Second); err != nil {
log.Printf("Warning: %v", err)
log.Println("Proceeding anyway, but metrics may be incomplete")
}
runScenario(ctx, kubeClient, scenarioID, "old", cfg.OldImage, cfg.Duration, cfg.ResultsDir)
collectMetrics(ctx, promMgr, oldMgr.Job(), scenarioID, "old", cfg.ResultsDir)
collectLogs(ctx, oldMgr, scenarioID, "old", cfg.ResultsDir)
if runBoth {
cleanupTestNamespaces(ctx, "")
oldMgr.Cleanup(ctx)
promMgr.Reset(ctx)
createTestNamespace(ctx, "")
}
}
if runNew {
newMgr := reloader.NewManager(reloader.Config{
Version: "new",
Image: cfg.NewImage,
})
if err := newMgr.Deploy(ctx); err != nil {
log.Printf("Failed to deploy new Reloader: %v", err)
continue
}
if err := promMgr.WaitForTarget(ctx, newMgr.Job(), 60*time.Second); err != nil {
log.Printf("Warning: %v", err)
log.Println("Proceeding anyway, but metrics may be incomplete")
}
runScenario(ctx, kubeClient, scenarioID, "new", cfg.NewImage, cfg.Duration, cfg.ResultsDir)
collectMetrics(ctx, promMgr, newMgr.Job(), scenarioID, "new", cfg.ResultsDir)
collectLogs(ctx, newMgr, scenarioID, "new", cfg.ResultsDir)
}
generateReport(scenarioID, cfg.ResultsDir, runBoth)
log.Printf("=== Scenario %s complete ===", scenarioID)
}
log.Println("Load test complete!")
log.Printf("Results available in: %s", cfg.ResultsDir)
}
func runParallel(ctx context.Context, cfg RunConfig, scenariosToRun []string, runtime string, runOld, runNew, runBoth bool) {
numWorkers := cfg.Parallelism
if numWorkers > len(scenariosToRun) {
numWorkers = len(scenariosToRun)
log.Printf("Reducing parallelism to %d (number of scenarios)", numWorkers)
}
log.Printf("Starting parallel execution with %d workers", numWorkers)
workers := make([]*workerContext, numWorkers)
var setupWg sync.WaitGroup
setupErrors := make(chan error, numWorkers)
log.Println("Setting up worker clusters...")
for i := range numWorkers {
setupWg.Add(1)
go func(workerID int) {
defer setupWg.Done()
worker, err := setupWorker(ctx, cfg, workerID, runtime, runOld, runNew)
if err != nil {
setupErrors <- fmt.Errorf("worker %d setup failed: %w", workerID, err)
return
}
workers[workerID] = worker
}(i)
}
setupWg.Wait()
close(setupErrors)
for err := range setupErrors {
log.Printf("Error: %v", err)
}
readyWorkers := 0
for _, w := range workers {
if w != nil {
readyWorkers++
}
}
if readyWorkers == 0 {
log.Fatal("No workers ready, aborting")
}
if readyWorkers < numWorkers {
log.Printf("Warning: only %d/%d workers ready", readyWorkers, numWorkers)
}
defer func() {
log.Println("Cleaning up worker clusters...")
for _, w := range workers {
if w != nil {
w.promMgr.StopPortForward()
}
}
}()
scenarioCh := make(chan string, len(scenariosToRun))
for _, s := range scenariosToRun {
scenarioCh <- s
}
close(scenarioCh)
var resultsMu sync.Mutex
completedScenarios := make([]string, 0, len(scenariosToRun))
var wg sync.WaitGroup
for _, worker := range workers {
if worker == nil {
continue
}
wg.Add(1)
go func(w *workerContext) {
defer wg.Done()
for scenarioID := range scenarioCh {
select {
case <-ctx.Done():
return
default:
}
log.Printf("[Worker %d] Starting scenario %s", w.id, scenarioID)
cleanupTestNamespaces(ctx, w.kubeContext)
reloader.CleanupByVersion(ctx, "old", w.kubeContext)
reloader.CleanupByVersion(ctx, "new", w.kubeContext)
if err := w.promMgr.Reset(ctx); err != nil {
log.Printf("[Worker %d] Warning: failed to reset Prometheus: %v", w.id, err)
}
createTestNamespace(ctx, w.kubeContext)
if runOld {
runVersionOnWorker(ctx, w, cfg, scenarioID, "old", cfg.OldImage, runBoth)
}
if runNew {
runVersionOnWorker(ctx, w, cfg, scenarioID, "new", cfg.NewImage, false)
}
generateReport(scenarioID, cfg.ResultsDir, runBoth)
resultsMu.Lock()
completedScenarios = append(completedScenarios, scenarioID)
resultsMu.Unlock()
log.Printf("[Worker %d] Scenario %s complete", w.id, scenarioID)
}
}(worker)
}
wg.Wait()
log.Println("Load test complete!")
log.Printf("Completed %d/%d scenarios", len(completedScenarios), len(scenariosToRun))
log.Printf("Results available in: %s", cfg.ResultsDir)
}
func setupWorker(ctx context.Context, cfg RunConfig, workerID int, runtime string, runOld, runNew bool) (*workerContext, error) {
workerName := fmt.Sprintf("%s-%d", DefaultClusterName, workerID)
promPort := 9091 + workerID
log.Printf("[Worker %d] Creating cluster %s (ports %d/%d)...", workerID, workerName, 8080+workerID, 8443+workerID)
clusterMgr := cluster.NewManager(cluster.Config{
Name: workerName,
ContainerRuntime: runtime,
PortOffset: workerID,
})
if err := clusterMgr.Create(ctx); err != nil {
return nil, fmt.Errorf("creating cluster: %w", err)
}
kubeContext := clusterMgr.Context()
promManifest := filepath.Join(cfg.ManifestsDir, "prometheus.yaml")
promMgr := prometheus.NewManagerWithPort(promManifest, promPort, kubeContext)
log.Printf("[Worker %d] Installing Prometheus (port %d)...", workerID, promPort)
if err := promMgr.Deploy(ctx); err != nil {
return nil, fmt.Errorf("deploying prometheus: %w", err)
}
if err := promMgr.StartPortForward(ctx); err != nil {
return nil, fmt.Errorf("starting prometheus port-forward: %w", err)
}
if cfg.SkipImageLoad {
log.Printf("[Worker %d] Skipping image loading (--skip-image-load)", workerID)
} else {
log.Printf("[Worker %d] Loading images...", workerID)
if runOld {
if err := clusterMgr.LoadImage(ctx, cfg.OldImage); err != nil {
log.Printf("[Worker %d] Warning: failed to load old image: %v", workerID, err)
}
}
if runNew {
if err := clusterMgr.LoadImage(ctx, cfg.NewImage); err != nil {
log.Printf("[Worker %d] Warning: failed to load new image: %v", workerID, err)
}
}
testImage := "gcr.io/google-containers/busybox:1.27"
clusterMgr.LoadImage(ctx, testImage)
}
kubeClient, err := getKubeClient(kubeContext)
if err != nil {
return nil, fmt.Errorf("creating kubernetes client: %w", err)
}
log.Printf("[Worker %d] Ready", workerID)
return &workerContext{
id: workerID,
clusterMgr: clusterMgr,
promMgr: promMgr,
kubeClient: kubeClient,
kubeContext: kubeContext,
runtime: runtime,
}, nil
}
func runVersionOnWorker(ctx context.Context, w *workerContext, cfg RunConfig, scenarioID, version, image string, cleanupAfter bool) {
mgr := reloader.NewManager(reloader.Config{
Version: version,
Image: image,
})
mgr.SetKubeContext(w.kubeContext)
if err := mgr.Deploy(ctx); err != nil {
log.Printf("[Worker %d] Failed to deploy %s Reloader: %v", w.id, version, err)
return
}
if err := w.promMgr.WaitForTarget(ctx, mgr.Job(), 60*time.Second); err != nil {
log.Printf("[Worker %d] Warning: %v", w.id, err)
log.Printf("[Worker %d] Proceeding anyway, but metrics may be incomplete", w.id)
}
runScenario(ctx, w.kubeClient, scenarioID, version, image, cfg.Duration, cfg.ResultsDir)
collectMetrics(ctx, w.promMgr, mgr.Job(), scenarioID, version, cfg.ResultsDir)
collectLogs(ctx, mgr, scenarioID, version, cfg.ResultsDir)
if cleanupAfter {
cleanupTestNamespaces(ctx, w.kubeContext)
mgr.Cleanup(ctx)
w.promMgr.Reset(ctx)
createTestNamespace(ctx, w.kubeContext)
}
}
func runScenario(ctx context.Context, client kubernetes.Interface, scenarioID, version, image string, duration int, resultsDir string) {
runner, ok := scenarios.Registry[scenarioID]
if !ok {
log.Printf("Unknown scenario: %s", scenarioID)
return
}
if s6, ok := runner.(*scenarios.ControllerRestartScenario); ok {
s6.ReloaderVersion = version
}
if s11, ok := runner.(*scenarios.AnnotationStrategyScenario); ok {
s11.Image = image
}
log.Printf("Running scenario %s (%s): %s", scenarioID, version, runner.Description())
if ctx.Err() != nil {
log.Printf("WARNING: Parent context already done: %v", ctx.Err())
}
timeout := time.Duration(duration)*time.Second + 5*time.Minute
log.Printf("Creating scenario context with timeout: %v (duration=%ds)", timeout, duration)
scenarioCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
expected, err := runner.Run(scenarioCtx, client, TestNamespace, time.Duration(duration)*time.Second)
if err != nil {
log.Printf("Scenario %s failed: %v", scenarioID, err)
}
scenarios.WriteExpectedMetrics(scenarioID, resultsDir, expected)
}
func collectMetrics(ctx context.Context, promMgr *prometheus.Manager, job, scenarioID, version, resultsDir string) {
log.Printf("Waiting 5s for Reloader to finish processing events...")
time.Sleep(5 * time.Second)
log.Printf("Waiting 8s for Prometheus to scrape final metrics...")
time.Sleep(8 * time.Second)
log.Printf("Collecting metrics for %s...", version)
outputDir := filepath.Join(resultsDir, scenarioID, version)
if err := promMgr.CollectMetrics(ctx, job, outputDir, scenarioID); err != nil {
log.Printf("Failed to collect metrics: %v", err)
}
}
func collectLogs(ctx context.Context, mgr *reloader.Manager, scenarioID, version, resultsDir string) {
log.Printf("Collecting logs for %s...", version)
logPath := filepath.Join(resultsDir, scenarioID, version, "reloader.log")
if err := mgr.CollectLogs(ctx, logPath); err != nil {
log.Printf("Failed to collect logs: %v", err)
}
}
func generateReport(scenarioID, resultsDir string, isComparison bool) {
if isComparison {
log.Println("Generating comparison report...")
} else {
log.Println("Generating single-version report...")
}
reportPath := filepath.Join(resultsDir, scenarioID, "report.txt")
cmd := exec.Command(os.Args[0], "report",
fmt.Sprintf("--scenario=%s", scenarioID),
fmt.Sprintf("--results-dir=%s", resultsDir),
fmt.Sprintf("--output=%s", reportPath))
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Run()
if data, err := os.ReadFile(reportPath); err == nil {
fmt.Println(string(data))
}
log.Printf("Report saved to: %s", reportPath)
}
func getKubeClient(kubeContext string) (kubernetes.Interface, error) {
kubeconfig := os.Getenv("KUBECONFIG")
if kubeconfig == "" {
home, _ := os.UserHomeDir()
kubeconfig = filepath.Join(home, ".kube", "config")
}
loadingRules := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig}
configOverrides := &clientcmd.ConfigOverrides{}
if kubeContext != "" {
configOverrides.CurrentContext = kubeContext
}
kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides)
config, err := kubeConfig.ClientConfig()
if err != nil {
return nil, err
}
return kubernetes.NewForConfig(config)
}
func createTestNamespace(ctx context.Context, kubeContext string) {
args := []string{"create", "namespace", TestNamespace, "--dry-run=client", "-o", "yaml"}
if kubeContext != "" {
args = append([]string{"--context", kubeContext}, args...)
}
cmd := exec.CommandContext(ctx, "kubectl", args...)
out, _ := cmd.Output()
applyArgs := []string{"apply", "-f", "-"}
if kubeContext != "" {
applyArgs = append([]string{"--context", kubeContext}, applyArgs...)
}
applyCmd := exec.CommandContext(ctx, "kubectl", applyArgs...)
applyCmd.Stdin = strings.NewReader(string(out))
applyCmd.Run()
}
func cleanupTestNamespaces(ctx context.Context, kubeContext string) {
log.Println("Cleaning up test resources...")
namespaces := []string{TestNamespace}
for i := range 10 {
namespaces = append(namespaces, fmt.Sprintf("%s-%d", TestNamespace, i))
}
for _, ns := range namespaces {
args := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"}
if kubeContext != "" {
args = append([]string{"--context", kubeContext}, args...)
}
exec.CommandContext(ctx, "kubectl", args...).Run()
}
time.Sleep(2 * time.Second)
for _, ns := range namespaces {
args := []string{"delete", "pods", "--all", "-n", ns, "--grace-period=0", "--force"}
if kubeContext != "" {
args = append([]string{"--context", kubeContext}, args...)
}
exec.CommandContext(ctx, "kubectl", args...).Run()
}
}

View File

@@ -0,0 +1,251 @@
package cmd
import (
"encoding/json"
"fmt"
"log"
"os"
"sort"
"strings"
"time"
"github.com/spf13/cobra"
)
var (
summaryResultsDir string
summaryOutputFile string
summaryFormat string
summaryTestType string
)
var summaryCmd = &cobra.Command{
Use: "summary",
Short: "Generate summary across all scenarios (for CI)",
Long: `Generate an aggregated summary report across all test scenarios.
Examples:
# Generate markdown summary for CI
loadtest summary --results-dir=./results --format=markdown`,
Run: func(cmd *cobra.Command, args []string) {
summaryCommand()
},
}
func init() {
summaryCmd.Flags().StringVar(&summaryResultsDir, "results-dir", "./results", "Directory containing results")
summaryCmd.Flags().StringVar(&summaryOutputFile, "output", "", "Output file (default: stdout)")
summaryCmd.Flags().StringVar(&summaryFormat, "format", "markdown", "Output format: text, json, markdown")
summaryCmd.Flags().StringVar(&summaryTestType, "test-type", "full", "Test type label: quick, full")
}
// SummaryReport aggregates results from multiple scenarios.
type SummaryReport struct {
Timestamp time.Time `json:"timestamp"`
TestType string `json:"test_type"`
PassCount int `json:"pass_count"`
FailCount int `json:"fail_count"`
TotalCount int `json:"total_count"`
Scenarios []ScenarioSummary `json:"scenarios"`
}
// ScenarioSummary provides a brief summary of a single scenario.
type ScenarioSummary struct {
ID string `json:"id"`
Status string `json:"status"`
Description string `json:"description"`
ActionTotal float64 `json:"action_total"`
ActionExp float64 `json:"action_expected"`
ErrorsTotal float64 `json:"errors_total"`
}
func summaryCommand() {
summary, err := generateSummaryReport(summaryResultsDir, summaryTestType)
if err != nil {
log.Fatalf("Failed to generate summary: %v", err)
}
var output string
switch OutputFormat(summaryFormat) {
case OutputFormatJSON:
output = renderSummaryJSON(summary)
case OutputFormatText:
output = renderSummaryText(summary)
default:
output = renderSummaryMarkdown(summary)
}
if summaryOutputFile != "" {
if err := os.WriteFile(summaryOutputFile, []byte(output), 0644); err != nil {
log.Fatalf("Failed to write output file: %v", err)
}
log.Printf("Summary written to %s", summaryOutputFile)
} else {
fmt.Print(output)
}
if summary.FailCount > 0 {
os.Exit(1)
}
}
func generateSummaryReport(resultsDir, testType string) (*SummaryReport, error) {
summary := &SummaryReport{
Timestamp: time.Now(),
TestType: testType,
}
entries, err := os.ReadDir(resultsDir)
if err != nil {
return nil, fmt.Errorf("failed to read results directory: %w", err)
}
for _, entry := range entries {
if !entry.IsDir() || !strings.HasPrefix(entry.Name(), "S") {
continue
}
scenarioID := entry.Name()
report, err := generateScenarioReport(scenarioID, resultsDir)
if err != nil {
log.Printf("Warning: failed to load scenario %s: %v", scenarioID, err)
continue
}
scenarioSummary := ScenarioSummary{
ID: scenarioID,
Status: report.OverallStatus,
Description: report.TestDescription,
}
for _, c := range report.Comparisons {
switch c.Name {
case "action_total":
scenarioSummary.ActionTotal = c.NewValue
scenarioSummary.ActionExp = c.Expected
case "errors_total":
scenarioSummary.ErrorsTotal = c.NewValue
}
}
summary.Scenarios = append(summary.Scenarios, scenarioSummary)
summary.TotalCount++
if report.OverallStatus == "PASS" {
summary.PassCount++
} else {
summary.FailCount++
}
}
sort.Slice(summary.Scenarios, func(i, j int) bool {
return naturalSort(summary.Scenarios[i].ID, summary.Scenarios[j].ID)
})
return summary, nil
}
func naturalSort(a, b string) bool {
var aNum, bNum int
fmt.Sscanf(a, "S%d", &aNum)
fmt.Sscanf(b, "S%d", &bNum)
return aNum < bNum
}
func renderSummaryJSON(summary *SummaryReport) string {
data, err := json.MarshalIndent(summary, "", " ")
if err != nil {
return fmt.Sprintf(`{"error": "%s"}`, err.Error())
}
return string(data)
}
func renderSummaryText(summary *SummaryReport) string {
var sb strings.Builder
sb.WriteString("================================================================================\n")
sb.WriteString(" LOAD TEST SUMMARY\n")
sb.WriteString("================================================================================\n\n")
passRate := 0
if summary.TotalCount > 0 {
passRate = summary.PassCount * 100 / summary.TotalCount
}
fmt.Fprintf(&sb, "Test Type: %s\n", summary.TestType)
fmt.Fprintf(&sb, "Results: %d/%d passed (%d%%)\n\n", summary.PassCount, summary.TotalCount, passRate)
fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8s\n", "ID", "Status", "Description", "Actions", "Errors")
fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8s\n", "------", "--------", strings.Repeat("-", 45), "----------", "--------")
for _, s := range summary.Scenarios {
desc := s.Description
if len(desc) > 45 {
desc = desc[:42] + "..."
}
actions := fmt.Sprintf("%.0f", s.ActionTotal)
if s.ActionExp > 0 {
actions = fmt.Sprintf("%.0f/%.0f", s.ActionTotal, s.ActionExp)
}
fmt.Fprintf(&sb, "%-6s %-8s %-45s %10s %8.0f\n", s.ID, s.Status, desc, actions, s.ErrorsTotal)
}
sb.WriteString("\n================================================================================\n")
return sb.String()
}
func renderSummaryMarkdown(summary *SummaryReport) string {
var sb strings.Builder
emoji := "✅"
title := "ALL TESTS PASSED"
if summary.FailCount > 0 {
emoji = "❌"
title = fmt.Sprintf("%d TEST(S) FAILED", summary.FailCount)
} else if summary.TotalCount == 0 {
emoji = "⚠️"
title = "NO RESULTS"
}
sb.WriteString(fmt.Sprintf("## %s Load Test Results: %s\n\n", emoji, title))
if summary.TestType == "quick" {
sb.WriteString("> 🚀 **Quick Test** (S1, S4, S6) — Use `/loadtest` for full suite\n\n")
}
passRate := 0
if summary.TotalCount > 0 {
passRate = summary.PassCount * 100 / summary.TotalCount
}
sb.WriteString(fmt.Sprintf("**%d/%d passed** (%d%%)\n\n", summary.PassCount, summary.TotalCount, passRate))
sb.WriteString("| | Scenario | Description | Actions | Errors |\n")
sb.WriteString("|:-:|:--------:|-------------|:-------:|:------:|\n")
for _, s := range summary.Scenarios {
icon := "✅"
if s.Status != "PASS" {
icon = "❌"
}
desc := s.Description
if len(desc) > 45 {
desc = desc[:42] + "..."
}
actions := fmt.Sprintf("%.0f", s.ActionTotal)
if s.ActionExp > 0 {
actions = fmt.Sprintf("%.0f/%.0f", s.ActionTotal, s.ActionExp)
}
errors := fmt.Sprintf("%.0f", s.ErrorsTotal)
if s.ErrorsTotal > 0 {
errors = fmt.Sprintf("⚠️ %.0f", s.ErrorsTotal)
}
sb.WriteString(fmt.Sprintf("| %s | **%s** | %s | %s | %s |\n", icon, s.ID, desc, actions, errors))
}
sb.WriteString("\n📦 **[Download detailed results](../artifacts)**\n")
return sb.String()
}

View File

@@ -0,0 +1,429 @@
// Package prometheus provides Prometheus deployment and querying functionality.
package prometheus
import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
// Manager handles Prometheus operations.
type Manager struct {
manifestPath string
portForward *exec.Cmd
localPort int
kubeContext string
}
// NewManager creates a new Prometheus manager.
func NewManager(manifestPath string) *Manager {
return &Manager{
manifestPath: manifestPath,
localPort: 9091,
}
}
// NewManagerWithPort creates a Prometheus manager with a custom port.
func NewManagerWithPort(manifestPath string, port int, kubeContext string) *Manager {
return &Manager{
manifestPath: manifestPath,
localPort: port,
kubeContext: kubeContext,
}
}
// kubectl returns kubectl args with optional context
func (m *Manager) kubectl(args ...string) []string {
if m.kubeContext != "" {
return append([]string{"--context", m.kubeContext}, args...)
}
return args
}
// Deploy deploys Prometheus to the cluster.
func (m *Manager) Deploy(ctx context.Context) error {
cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("create", "namespace", "monitoring", "--dry-run=client", "-o", "yaml")...)
out, err := cmd.Output()
if err != nil {
return fmt.Errorf("generating namespace yaml: %w", err)
}
applyCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", "-")...)
applyCmd.Stdin = strings.NewReader(string(out))
if err := applyCmd.Run(); err != nil {
return fmt.Errorf("applying namespace: %w", err)
}
applyCmd = exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", m.manifestPath)...)
applyCmd.Stdout = os.Stdout
applyCmd.Stderr = os.Stderr
if err := applyCmd.Run(); err != nil {
return fmt.Errorf("applying prometheus manifest: %w", err)
}
fmt.Println("Waiting for Prometheus to be ready...")
waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod",
"-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...)
waitCmd.Stdout = os.Stdout
waitCmd.Stderr = os.Stderr
if err := waitCmd.Run(); err != nil {
return fmt.Errorf("waiting for prometheus: %w", err)
}
return nil
}
// StartPortForward starts port-forwarding to Prometheus.
func (m *Manager) StartPortForward(ctx context.Context) error {
m.StopPortForward()
m.portForward = exec.CommandContext(ctx, "kubectl", m.kubectl("port-forward",
"-n", "monitoring", "svc/prometheus", fmt.Sprintf("%d:9090", m.localPort))...)
if err := m.portForward.Start(); err != nil {
return fmt.Errorf("starting port-forward: %w", err)
}
for i := 0; i < 30; i++ {
time.Sleep(time.Second)
if m.isAccessible() {
fmt.Printf("Prometheus accessible at http://localhost:%d\n", m.localPort)
return nil
}
}
return fmt.Errorf("prometheus port-forward not ready after 30s")
}
// StopPortForward stops the port-forward process.
func (m *Manager) StopPortForward() {
if m.portForward != nil && m.portForward.Process != nil {
m.portForward.Process.Kill()
m.portForward = nil
}
exec.Command("pkill", "-f", fmt.Sprintf("kubectl port-forward.*prometheus.*%d", m.localPort)).Run()
}
// Reset restarts Prometheus to clear all metrics.
func (m *Manager) Reset(ctx context.Context) error {
m.StopPortForward()
cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("delete", "pod", "-n", "monitoring",
"-l", "app=prometheus", "--grace-period=0", "--force")...)
cmd.Run()
fmt.Println("Waiting for Prometheus to restart...")
waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod",
"-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...)
if err := waitCmd.Run(); err != nil {
return fmt.Errorf("waiting for prometheus restart: %w", err)
}
if err := m.StartPortForward(ctx); err != nil {
return err
}
fmt.Println("Waiting 5s for Prometheus to initialize scraping...")
time.Sleep(5 * time.Second)
return nil
}
func (m *Manager) isAccessible() bool {
conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", m.localPort), 2*time.Second)
if err != nil {
return false
}
conn.Close()
resp, err := http.Get(fmt.Sprintf("http://localhost:%d/api/v1/status/config", m.localPort))
if err != nil {
return false
}
resp.Body.Close()
return resp.StatusCode == 200
}
// URL returns the local Prometheus URL.
func (m *Manager) URL() string {
return fmt.Sprintf("http://localhost:%d", m.localPort)
}
// WaitForTarget waits for a specific job to be scraped by Prometheus.
func (m *Manager) WaitForTarget(ctx context.Context, job string, timeout time.Duration) error {
fmt.Printf("Waiting for Prometheus to discover and scrape job '%s'...\n", job)
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if m.isTargetHealthy(job) {
fmt.Printf("Prometheus is scraping job '%s'\n", job)
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(2 * time.Second):
}
}
m.printTargetStatus(job)
return fmt.Errorf("timeout waiting for Prometheus to scrape job '%s'", job)
}
// isTargetHealthy checks if a job has at least one healthy target.
func (m *Manager) isTargetHealthy(job string) bool {
resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL()))
if err != nil {
return false
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return false
}
var result struct {
Status string `json:"status"`
Data struct {
ActiveTargets []struct {
Labels map[string]string `json:"labels"`
Health string `json:"health"`
} `json:"activeTargets"`
} `json:"data"`
}
if err := json.Unmarshal(body, &result); err != nil {
return false
}
for _, target := range result.Data.ActiveTargets {
if target.Labels["job"] == job && target.Health == "up" {
return true
}
}
return false
}
// printTargetStatus prints debug info about targets.
func (m *Manager) printTargetStatus(job string) {
resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL()))
if err != nil {
fmt.Printf("Failed to get targets: %v\n", err)
return
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var result struct {
Data struct {
ActiveTargets []struct {
Labels map[string]string `json:"labels"`
Health string `json:"health"`
LastError string `json:"lastError"`
ScrapeURL string `json:"scrapeUrl"`
} `json:"activeTargets"`
} `json:"data"`
}
if err := json.Unmarshal(body, &result); err != nil {
fmt.Printf("Failed to parse targets: %v\n", err)
return
}
fmt.Printf("Prometheus targets for job '%s':\n", job)
found := false
for _, target := range result.Data.ActiveTargets {
if target.Labels["job"] == job {
found = true
fmt.Printf(" - %s: health=%s, lastError=%s\n",
target.ScrapeURL, target.Health, target.LastError)
}
}
if !found {
fmt.Printf(" No targets found for job '%s'\n", job)
fmt.Printf(" Available jobs: ")
jobs := make(map[string]bool)
for _, target := range result.Data.ActiveTargets {
jobs[target.Labels["job"]] = true
}
for j := range jobs {
fmt.Printf("%s ", j)
}
fmt.Println()
}
}
// HasMetrics checks if the specified job has any metrics available.
func (m *Manager) HasMetrics(ctx context.Context, job string) bool {
query := fmt.Sprintf(`up{job="%s"}`, job)
result, err := m.Query(ctx, query)
if err != nil {
return false
}
return len(result.Data.Result) > 0 && result.Data.Result[0].Value[1] == "1"
}
// QueryResponse represents a Prometheus query response.
type QueryResponse struct {
Status string `json:"status"`
Data struct {
ResultType string `json:"resultType"`
Result []struct {
Metric map[string]string `json:"metric"`
Value []interface{} `json:"value"`
} `json:"result"`
} `json:"data"`
}
// Query executes a PromQL query and returns the response.
func (m *Manager) Query(ctx context.Context, query string) (*QueryResponse, error) {
u := fmt.Sprintf("%s/api/v1/query?query=%s", m.URL(), url.QueryEscape(query))
req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("querying prometheus: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("reading response: %w", err)
}
var result QueryResponse
if err := json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("parsing response: %w", err)
}
return &result, nil
}
// CollectMetrics collects all metrics for a scenario and writes to output directory.
func (m *Manager) CollectMetrics(ctx context.Context, job, outputDir, scenario string) error {
if err := os.MkdirAll(outputDir, 0755); err != nil {
return fmt.Errorf("creating output directory: %w", err)
}
timeRange := "10m"
// For S6 (restart scenario), use increase() to handle counter resets
useIncrease := scenario == "S6"
counterMetrics := []string{
"reloader_reconcile_total",
"reloader_action_total",
"reloader_skipped_total",
"reloader_errors_total",
"reloader_events_received_total",
"reloader_workloads_scanned_total",
"reloader_workloads_matched_total",
"reloader_reload_executed_total",
}
for _, metric := range counterMetrics {
var query string
if useIncrease {
query = fmt.Sprintf(`sum(increase(%s{job="%s"}[%s])) by (success, reason)`, metric, job, timeRange)
} else {
query = fmt.Sprintf(`sum(%s{job="%s"}) by (success, reason)`, metric, job)
}
if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, metric+".json")); err != nil {
fmt.Printf("Warning: failed to collect %s: %v\n", metric, err)
}
}
histogramMetrics := []struct {
name string
prefix string
}{
{"reloader_reconcile_duration_seconds", "reconcile"},
{"reloader_action_latency_seconds", "action"},
}
for _, hm := range histogramMetrics {
for _, pct := range []int{50, 95, 99} {
quantile := float64(pct) / 100
query := fmt.Sprintf(`histogram_quantile(%v, sum(rate(%s_bucket{job="%s"}[%s])) by (le))`,
quantile, hm.name, job, timeRange)
outFile := filepath.Join(outputDir, fmt.Sprintf("%s_p%d.json", hm.prefix, pct))
if err := m.queryAndSave(ctx, query, outFile); err != nil {
fmt.Printf("Warning: failed to collect %s p%d: %v\n", hm.name, pct, err)
}
}
}
restQueries := map[string]string{
"rest_client_requests_total.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s"})`, job),
"rest_client_requests_get.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="GET"})`, job),
"rest_client_requests_patch.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PATCH"})`, job),
"rest_client_requests_put.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PUT"})`, job),
"rest_client_requests_errors.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",code=~"[45].."}) or vector(0)`, job),
}
for filename, query := range restQueries {
if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil {
fmt.Printf("Warning: failed to collect %s: %v\n", filename, err)
}
}
resourceQueries := map[string]string{
"memory_rss_bytes_avg.json": fmt.Sprintf(`avg_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange),
"memory_rss_bytes_max.json": fmt.Sprintf(`max_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange),
"memory_rss_bytes_cur.json": fmt.Sprintf(`process_resident_memory_bytes{job="%s"}`, job),
"memory_heap_bytes_avg.json": fmt.Sprintf(`avg_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange),
"memory_heap_bytes_max.json": fmt.Sprintf(`max_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange),
"cpu_usage_cores_avg.json": fmt.Sprintf(`rate(process_cpu_seconds_total{job="%s"}[%s])`, job, timeRange),
"cpu_usage_cores_max.json": fmt.Sprintf(`max_over_time(rate(process_cpu_seconds_total{job="%s"}[1m])[%s:1m])`, job, timeRange),
"goroutines_avg.json": fmt.Sprintf(`avg_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange),
"goroutines_max.json": fmt.Sprintf(`max_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange),
"goroutines_cur.json": fmt.Sprintf(`go_goroutines{job="%s"}`, job),
"gc_duration_seconds_p99.json": fmt.Sprintf(`histogram_quantile(0.99, sum(rate(go_gc_duration_seconds_bucket{job="%s"}[%s])) by (le))`, job, timeRange),
"threads_cur.json": fmt.Sprintf(`go_threads{job="%s"}`, job),
}
for filename, query := range resourceQueries {
if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil {
fmt.Printf("Warning: failed to collect %s: %v\n", filename, err)
}
}
return nil
}
func (m *Manager) queryAndSave(ctx context.Context, query, outputPath string) error {
result, err := m.Query(ctx, query)
if err != nil {
emptyResult := `{"status":"success","data":{"resultType":"vector","result":[]}}`
return os.WriteFile(outputPath, []byte(emptyResult), 0644)
}
data, err := json.MarshalIndent(result, "", " ")
if err != nil {
return err
}
return os.WriteFile(outputPath, data, 0644)
}

View File

@@ -0,0 +1,271 @@
package reloader
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
// Config holds configuration for a Reloader deployment.
type Config struct {
Version string
Image string
Namespace string
ReloadStrategy string
}
// Manager handles Reloader deployment operations.
type Manager struct {
config Config
kubeContext string
}
// NewManager creates a new Reloader manager.
func NewManager(config Config) *Manager {
return &Manager{
config: config,
}
}
// SetKubeContext sets the kubeconfig context to use.
func (m *Manager) SetKubeContext(kubeContext string) {
m.kubeContext = kubeContext
}
// kubectl returns kubectl command with optional context.
func (m *Manager) kubectl(ctx context.Context, args ...string) *exec.Cmd {
if m.kubeContext != "" {
args = append([]string{"--context", m.kubeContext}, args...)
}
return exec.CommandContext(ctx, "kubectl", args...)
}
// namespace returns the namespace for this reloader instance.
func (m *Manager) namespace() string {
if m.config.Namespace != "" {
return m.config.Namespace
}
return fmt.Sprintf("reloader-%s", m.config.Version)
}
// releaseName returns the release name for this instance.
func (m *Manager) releaseName() string {
return fmt.Sprintf("reloader-%s", m.config.Version)
}
// Job returns the Prometheus job name for this Reloader instance.
func (m *Manager) Job() string {
return fmt.Sprintf("reloader-%s", m.config.Version)
}
// Deploy deploys Reloader to the cluster using raw manifests.
func (m *Manager) Deploy(ctx context.Context) error {
ns := m.namespace()
name := m.releaseName()
fmt.Printf("Deploying Reloader (%s) with image %s...\n", m.config.Version, m.config.Image)
manifest := m.buildManifest(ns, name)
applyCmd := m.kubectl(ctx, "apply", "-f", "-")
applyCmd.Stdin = strings.NewReader(manifest)
applyCmd.Stdout = os.Stdout
applyCmd.Stderr = os.Stderr
if err := applyCmd.Run(); err != nil {
return fmt.Errorf("applying manifest: %w", err)
}
fmt.Printf("Waiting for Reloader deployment to be ready...\n")
waitCmd := m.kubectl(ctx, "rollout", "status", "deployment", name,
"-n", ns,
"--timeout=120s")
waitCmd.Stdout = os.Stdout
waitCmd.Stderr = os.Stderr
if err := waitCmd.Run(); err != nil {
return fmt.Errorf("waiting for deployment: %w", err)
}
time.Sleep(2 * time.Second)
fmt.Printf("Reloader (%s) deployed successfully\n", m.config.Version)
return nil
}
// buildManifest creates the raw Kubernetes manifest for Reloader.
func (m *Manager) buildManifest(ns, name string) string {
var args []string
args = append(args, "--log-format=json")
if m.config.ReloadStrategy != "" && m.config.ReloadStrategy != "default" {
args = append(args, fmt.Sprintf("--reload-strategy=%s", m.config.ReloadStrategy))
}
argsYAML := ""
if len(args) > 0 {
argsYAML = " args:\n"
for _, arg := range args {
argsYAML += fmt.Sprintf(" - %q\n", arg)
}
}
return fmt.Sprintf(`---
apiVersion: v1
kind: Namespace
metadata:
name: %[1]s
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: %[2]s
namespace: %[1]s
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: %[2]s
rules:
- apiGroups: ["*"]
resources: ["*"]
verbs: ["*"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: %[2]s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: %[2]s
subjects:
- kind: ServiceAccount
name: %[2]s
namespace: %[1]s
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: %[2]s
namespace: %[1]s
labels:
app: %[2]s
app.kubernetes.io/name: reloader
loadtest-version: %[3]s
spec:
replicas: 1
selector:
matchLabels:
app: %[2]s
template:
metadata:
labels:
app: %[2]s
app.kubernetes.io/name: reloader
loadtest-version: %[3]s
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9090"
prometheus.io/path: "/metrics"
spec:
serviceAccountName: %[2]s
securityContext:
runAsNonRoot: true
runAsUser: 65534
containers:
- name: reloader
image: %[4]s
imagePullPolicy: IfNotPresent
ports:
- name: http
containerPort: 9090
%[5]s resources:
requests:
cpu: 10m
memory: 64Mi
limits:
cpu: 500m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
`, ns, name, m.config.Version, m.config.Image, argsYAML)
}
// Cleanup removes all Reloader resources from the cluster.
func (m *Manager) Cleanup(ctx context.Context) error {
ns := m.namespace()
name := m.releaseName()
delDeploy := m.kubectl(ctx, "delete", "deployment", name, "-n", ns, "--ignore-not-found")
delDeploy.Run()
delCRB := m.kubectl(ctx, "delete", "clusterrolebinding", name, "--ignore-not-found")
delCRB.Run()
delCR := m.kubectl(ctx, "delete", "clusterrole", name, "--ignore-not-found")
delCR.Run()
delNS := m.kubectl(ctx, "delete", "namespace", ns, "--wait=false", "--ignore-not-found")
if err := delNS.Run(); err != nil {
return fmt.Errorf("deleting namespace: %w", err)
}
return nil
}
// CleanupByVersion removes Reloader resources for a specific version without needing a Manager instance.
// This is useful for cleaning up from previous runs before creating a new Manager.
func CleanupByVersion(ctx context.Context, version, kubeContext string) {
ns := fmt.Sprintf("reloader-%s", version)
name := fmt.Sprintf("reloader-%s", version)
nsArgs := []string{"delete", "namespace", ns, "--wait=false", "--ignore-not-found"}
crArgs := []string{"delete", "clusterrole", name, "--ignore-not-found"}
crbArgs := []string{"delete", "clusterrolebinding", name, "--ignore-not-found"}
if kubeContext != "" {
nsArgs = append([]string{"--context", kubeContext}, nsArgs...)
crArgs = append([]string{"--context", kubeContext}, crArgs...)
crbArgs = append([]string{"--context", kubeContext}, crbArgs...)
}
exec.CommandContext(ctx, "kubectl", nsArgs...).Run()
exec.CommandContext(ctx, "kubectl", crArgs...).Run()
exec.CommandContext(ctx, "kubectl", crbArgs...).Run()
}
// CollectLogs collects logs from the Reloader pod and writes them to the specified file.
func (m *Manager) CollectLogs(ctx context.Context, logPath string) error {
ns := m.namespace()
name := m.releaseName()
if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
return fmt.Errorf("creating log directory: %w", err)
}
cmd := m.kubectl(ctx, "logs",
"-n", ns,
"-l", fmt.Sprintf("app=%s", name),
"--tail=-1")
out, err := cmd.Output()
if err != nil {
cmd = m.kubectl(ctx, "logs",
"-n", ns,
"-l", "app.kubernetes.io/name=reloader",
"--tail=-1")
out, err = cmd.Output()
if err != nil {
return fmt.Errorf("collecting logs: %w", err)
}
}
if err := os.WriteFile(logPath, out, 0644); err != nil {
return fmt.Errorf("writing logs: %w", err)
}
return nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 2s
evaluation_interval: 2s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'reloader-old'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- reloader-old
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'reloader-new'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- reloader-new
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitoring
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: quay.io/prometheus/prometheus:v2.47.0
args:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --web.console.libraries=/usr/share/prometheus/console_libraries
- --web.console.templates=/usr/share/prometheus/consoles
- --web.enable-lifecycle
ports:
- containerPort: 9090
volumeMounts:
- name: config
mountPath: /etc/prometheus
- name: data
mountPath: /prometheus
resources:
limits:
cpu: 1000m
memory: 1Gi
requests:
cpu: 200m
memory: 512Mi
readinessProbe:
httpGet:
path: /-/ready
port: 9090
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
initialDelaySeconds: 10
periodSeconds: 10
volumes:
- name: config
configMap:
name: prometheus-config
- name: data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
spec:
selector:
app: prometheus
ports:
- port: 9090
targetPort: 9090
type: NodePort