mirror of
https://github.com/stakater/Reloader.git
synced 2026-02-14 18:09:50 +00:00
feat: Load tests
This commit is contained in:
313
test/loadtest/internal/cluster/kind.go
Normal file
313
test/loadtest/internal/cluster/kind.go
Normal file
@@ -0,0 +1,313 @@
|
||||
// Package cluster provides kind cluster management functionality.
|
||||
package cluster
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Config holds configuration for kind cluster operations.
|
||||
type Config struct {
|
||||
Name string
|
||||
ContainerRuntime string // "docker" or "podman"
|
||||
PortOffset int // Offset for host port mappings (for parallel clusters)
|
||||
}
|
||||
|
||||
// Manager handles kind cluster operations.
|
||||
type Manager struct {
|
||||
cfg Config
|
||||
}
|
||||
|
||||
// NewManager creates a new cluster manager.
|
||||
func NewManager(cfg Config) *Manager {
|
||||
return &Manager{cfg: cfg}
|
||||
}
|
||||
|
||||
// DetectContainerRuntime finds available container runtime.
|
||||
func DetectContainerRuntime() (string, error) {
|
||||
if _, err := exec.LookPath("podman"); err == nil {
|
||||
return "podman", nil
|
||||
}
|
||||
if _, err := exec.LookPath("docker"); err == nil {
|
||||
return "docker", nil
|
||||
}
|
||||
return "", fmt.Errorf("neither docker nor podman found in PATH")
|
||||
}
|
||||
|
||||
// Exists checks if the cluster already exists.
|
||||
func (m *Manager) Exists() bool {
|
||||
cmd := exec.Command("kind", "get", "clusters")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
if strings.TrimSpace(line) == m.cfg.Name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Delete deletes the kind cluster.
|
||||
func (m *Manager) Delete(ctx context.Context) error {
|
||||
cmd := exec.CommandContext(ctx, "kind", "delete", "cluster", "--name", m.cfg.Name)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// Create creates a new kind cluster with optimized settings.
|
||||
func (m *Manager) Create(ctx context.Context) error {
|
||||
if m.cfg.ContainerRuntime == "podman" {
|
||||
os.Setenv("KIND_EXPERIMENTAL_PROVIDER", "podman")
|
||||
}
|
||||
|
||||
if m.Exists() {
|
||||
fmt.Printf("Cluster %s already exists, deleting...\n", m.cfg.Name)
|
||||
if err := m.Delete(ctx); err != nil {
|
||||
return fmt.Errorf("deleting existing cluster: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate unique ports based on offset (for parallel clusters)
|
||||
httpPort := 8080 + m.cfg.PortOffset
|
||||
httpsPort := 8443 + m.cfg.PortOffset
|
||||
|
||||
config := fmt.Sprintf(`kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
networking:
|
||||
podSubnet: "10.244.0.0/16"
|
||||
serviceSubnet: "10.96.0.0/16"
|
||||
nodes:
|
||||
- role: control-plane
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: InitConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
node-labels: "ingress-ready=true"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
- |
|
||||
kind: ClusterConfiguration
|
||||
apiServer:
|
||||
extraArgs:
|
||||
max-requests-inflight: "800"
|
||||
max-mutating-requests-inflight: "400"
|
||||
watch-cache-sizes: "configmaps#1000,secrets#1000,pods#1000"
|
||||
controllerManager:
|
||||
extraArgs:
|
||||
kube-api-qps: "200"
|
||||
kube-api-burst: "200"
|
||||
scheduler:
|
||||
extraArgs:
|
||||
kube-api-qps: "200"
|
||||
kube-api-burst: "200"
|
||||
extraPortMappings:
|
||||
- containerPort: 80
|
||||
hostPort: %d
|
||||
protocol: TCP
|
||||
- containerPort: 443
|
||||
hostPort: %d
|
||||
protocol: TCP
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
max-pods: "250"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
max-pods: "250"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
max-pods: "250"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
max-pods: "250"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
max-pods: "250"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
max-pods: "250"
|
||||
kube-api-qps: "50"
|
||||
kube-api-burst: "100"
|
||||
serialize-image-pulls: "false"
|
||||
event-qps: "50"
|
||||
event-burst: "100"
|
||||
`, httpPort, httpsPort)
|
||||
cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", m.cfg.Name, "--config=-")
|
||||
cmd.Stdin = strings.NewReader(config)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// GetKubeconfig returns the kubeconfig for the cluster.
|
||||
func (m *Manager) GetKubeconfig() (string, error) {
|
||||
cmd := exec.Command("kind", "get", "kubeconfig", "--name", m.cfg.Name)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("getting kubeconfig: %w", err)
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
// Context returns the kubectl context name for this cluster.
|
||||
func (m *Manager) Context() string {
|
||||
return "kind-" + m.cfg.Name
|
||||
}
|
||||
|
||||
// Name returns the cluster name.
|
||||
func (m *Manager) Name() string {
|
||||
return m.cfg.Name
|
||||
}
|
||||
|
||||
// LoadImage loads a container image into the kind cluster.
|
||||
func (m *Manager) LoadImage(ctx context.Context, image string) error {
|
||||
// First check if image exists locally
|
||||
if !m.imageExistsLocally(image) {
|
||||
fmt.Printf(" Image not found locally, pulling: %s\n", image)
|
||||
pullCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image)
|
||||
pullCmd.Stdout = os.Stdout
|
||||
pullCmd.Stderr = os.Stderr
|
||||
if err := pullCmd.Run(); err != nil {
|
||||
return fmt.Errorf("pulling image %s: %w", image, err)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf(" Image found locally: %s\n", image)
|
||||
}
|
||||
|
||||
fmt.Printf(" Copying image to kind cluster...\n")
|
||||
|
||||
if m.cfg.ContainerRuntime == "podman" {
|
||||
// For podman, save to archive and load
|
||||
tmpFile := fmt.Sprintf("/tmp/kind-image-%d.tar", time.Now().UnixNano())
|
||||
defer os.Remove(tmpFile)
|
||||
|
||||
saveCmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "save", image, "-o", tmpFile)
|
||||
if err := saveCmd.Run(); err != nil {
|
||||
return fmt.Errorf("saving image %s: %w", image, err)
|
||||
}
|
||||
|
||||
loadCmd := exec.CommandContext(ctx, "kind", "load", "image-archive", tmpFile, "--name", m.cfg.Name)
|
||||
loadCmd.Stdout = os.Stdout
|
||||
loadCmd.Stderr = os.Stderr
|
||||
if err := loadCmd.Run(); err != nil {
|
||||
return fmt.Errorf("loading image archive: %w", err)
|
||||
}
|
||||
} else {
|
||||
loadCmd := exec.CommandContext(ctx, "kind", "load", "docker-image", image, "--name", m.cfg.Name)
|
||||
loadCmd.Stdout = os.Stdout
|
||||
loadCmd.Stderr = os.Stderr
|
||||
if err := loadCmd.Run(); err != nil {
|
||||
return fmt.Errorf("loading image %s: %w", image, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// imageExistsLocally checks if an image exists in the local container runtime.
|
||||
func (m *Manager) imageExistsLocally(image string) bool {
|
||||
// Try "image exists" command (works for podman)
|
||||
cmd := exec.Command(m.cfg.ContainerRuntime, "image", "exists", image)
|
||||
if err := cmd.Run(); err == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try "image inspect" (works for both docker and podman)
|
||||
cmd = exec.Command(m.cfg.ContainerRuntime, "image", "inspect", image)
|
||||
if err := cmd.Run(); err == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try listing images and grep
|
||||
cmd = exec.Command(m.cfg.ContainerRuntime, "images", "--format", "{{.Repository}}:{{.Tag}}")
|
||||
out, err := cmd.Output()
|
||||
if err == nil {
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
if strings.TrimSpace(line) == image {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// PullImage pulls an image using the container runtime.
|
||||
func (m *Manager) PullImage(ctx context.Context, image string) error {
|
||||
cmd := exec.CommandContext(ctx, m.cfg.ContainerRuntime, "pull", image)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
// ExecKubectl runs a kubectl command against the cluster.
|
||||
func (m *Manager) ExecKubectl(ctx context.Context, args ...string) ([]byte, error) {
|
||||
cmd := exec.CommandContext(ctx, "kubectl", args...)
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("%w: %s", err, stderr.String())
|
||||
}
|
||||
return stdout.Bytes(), nil
|
||||
}
|
||||
452
test/loadtest/internal/prometheus/prometheus.go
Normal file
452
test/loadtest/internal/prometheus/prometheus.go
Normal file
@@ -0,0 +1,452 @@
|
||||
// Package prometheus provides Prometheus deployment and querying functionality.
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Manager handles Prometheus operations.
|
||||
type Manager struct {
|
||||
manifestPath string
|
||||
portForward *exec.Cmd
|
||||
localPort int
|
||||
kubeContext string // Optional: use specific kubeconfig context
|
||||
}
|
||||
|
||||
// NewManager creates a new Prometheus manager.
|
||||
func NewManager(manifestPath string) *Manager {
|
||||
return &Manager{
|
||||
manifestPath: manifestPath,
|
||||
localPort: 9091, // Use 9091 to avoid conflicts
|
||||
}
|
||||
}
|
||||
|
||||
// NewManagerWithPort creates a Prometheus manager with a custom port.
|
||||
func NewManagerWithPort(manifestPath string, port int, kubeContext string) *Manager {
|
||||
return &Manager{
|
||||
manifestPath: manifestPath,
|
||||
localPort: port,
|
||||
kubeContext: kubeContext,
|
||||
}
|
||||
}
|
||||
|
||||
// kubectl returns kubectl args with optional context
|
||||
func (m *Manager) kubectl(args ...string) []string {
|
||||
if m.kubeContext != "" {
|
||||
return append([]string{"--context", m.kubeContext}, args...)
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
// Deploy deploys Prometheus to the cluster.
|
||||
func (m *Manager) Deploy(ctx context.Context) error {
|
||||
// Create namespace
|
||||
cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("create", "namespace", "monitoring", "--dry-run=client", "-o", "yaml")...)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating namespace yaml: %w", err)
|
||||
}
|
||||
|
||||
applyCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", "-")...)
|
||||
applyCmd.Stdin = strings.NewReader(string(out))
|
||||
if err := applyCmd.Run(); err != nil {
|
||||
return fmt.Errorf("applying namespace: %w", err)
|
||||
}
|
||||
|
||||
// Apply Prometheus manifest
|
||||
applyCmd = exec.CommandContext(ctx, "kubectl", m.kubectl("apply", "-f", m.manifestPath)...)
|
||||
applyCmd.Stdout = os.Stdout
|
||||
applyCmd.Stderr = os.Stderr
|
||||
if err := applyCmd.Run(); err != nil {
|
||||
return fmt.Errorf("applying prometheus manifest: %w", err)
|
||||
}
|
||||
|
||||
// Wait for Prometheus to be ready
|
||||
fmt.Println("Waiting for Prometheus to be ready...")
|
||||
waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod",
|
||||
"-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...)
|
||||
waitCmd.Stdout = os.Stdout
|
||||
waitCmd.Stderr = os.Stderr
|
||||
if err := waitCmd.Run(); err != nil {
|
||||
return fmt.Errorf("waiting for prometheus: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartPortForward starts port-forwarding to Prometheus.
|
||||
func (m *Manager) StartPortForward(ctx context.Context) error {
|
||||
m.StopPortForward()
|
||||
|
||||
// Start port-forward
|
||||
m.portForward = exec.CommandContext(ctx, "kubectl", m.kubectl("port-forward",
|
||||
"-n", "monitoring", "svc/prometheus", fmt.Sprintf("%d:9090", m.localPort))...)
|
||||
|
||||
if err := m.portForward.Start(); err != nil {
|
||||
return fmt.Errorf("starting port-forward: %w", err)
|
||||
}
|
||||
|
||||
// Wait for port-forward to be ready
|
||||
for i := 0; i < 30; i++ {
|
||||
time.Sleep(time.Second)
|
||||
if m.isAccessible() {
|
||||
fmt.Printf("Prometheus accessible at http://localhost:%d\n", m.localPort)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("prometheus port-forward not ready after 30s")
|
||||
}
|
||||
|
||||
// StopPortForward stops the port-forward process.
|
||||
func (m *Manager) StopPortForward() {
|
||||
if m.portForward != nil && m.portForward.Process != nil {
|
||||
m.portForward.Process.Kill()
|
||||
m.portForward = nil
|
||||
}
|
||||
// Also kill any lingering port-forwards
|
||||
exec.Command("pkill", "-f", fmt.Sprintf("kubectl port-forward.*prometheus.*%d", m.localPort)).Run()
|
||||
}
|
||||
|
||||
// Reset restarts Prometheus to clear all metrics.
|
||||
func (m *Manager) Reset(ctx context.Context) error {
|
||||
m.StopPortForward()
|
||||
|
||||
// Delete Prometheus pod to reset metrics
|
||||
cmd := exec.CommandContext(ctx, "kubectl", m.kubectl("delete", "pod", "-n", "monitoring",
|
||||
"-l", "app=prometheus", "--grace-period=0", "--force")...)
|
||||
cmd.Run() // Ignore errors
|
||||
|
||||
// Wait for new pod
|
||||
fmt.Println("Waiting for Prometheus to restart...")
|
||||
waitCmd := exec.CommandContext(ctx, "kubectl", m.kubectl("wait", "--for=condition=ready", "pod",
|
||||
"-l", "app=prometheus", "-n", "monitoring", "--timeout=120s")...)
|
||||
if err := waitCmd.Run(); err != nil {
|
||||
return fmt.Errorf("waiting for prometheus restart: %w", err)
|
||||
}
|
||||
|
||||
// Restart port-forward
|
||||
if err := m.StartPortForward(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for scraping to initialize
|
||||
fmt.Println("Waiting 5s for Prometheus to initialize scraping...")
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) isAccessible() bool {
|
||||
conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", m.localPort), 2*time.Second)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
conn.Close()
|
||||
|
||||
// Also try HTTP
|
||||
resp, err := http.Get(fmt.Sprintf("http://localhost:%d/api/v1/status/config", m.localPort))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
resp.Body.Close()
|
||||
return resp.StatusCode == 200
|
||||
}
|
||||
|
||||
// URL returns the local Prometheus URL.
|
||||
func (m *Manager) URL() string {
|
||||
return fmt.Sprintf("http://localhost:%d", m.localPort)
|
||||
}
|
||||
|
||||
// WaitForTarget waits for a specific job to be scraped by Prometheus.
|
||||
func (m *Manager) WaitForTarget(ctx context.Context, job string, timeout time.Duration) error {
|
||||
fmt.Printf("Waiting for Prometheus to discover and scrape job '%s'...\n", job)
|
||||
|
||||
deadline := time.Now().Add(timeout)
|
||||
for time.Now().Before(deadline) {
|
||||
if m.isTargetHealthy(job) {
|
||||
fmt.Printf("Prometheus is scraping job '%s'\n", job)
|
||||
return nil
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(2 * time.Second):
|
||||
}
|
||||
}
|
||||
|
||||
// Print debug info on timeout
|
||||
m.printTargetStatus(job)
|
||||
return fmt.Errorf("timeout waiting for Prometheus to scrape job '%s'", job)
|
||||
}
|
||||
|
||||
// isTargetHealthy checks if a job has at least one healthy target.
|
||||
func (m *Manager) isTargetHealthy(job string) bool {
|
||||
resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL()))
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
ActiveTargets []struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
Health string `json:"health"`
|
||||
} `json:"activeTargets"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, target := range result.Data.ActiveTargets {
|
||||
if target.Labels["job"] == job && target.Health == "up" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// printTargetStatus prints debug info about targets.
|
||||
func (m *Manager) printTargetStatus(job string) {
|
||||
resp, err := http.Get(fmt.Sprintf("%s/api/v1/targets", m.URL()))
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to get targets: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
|
||||
var result struct {
|
||||
Data struct {
|
||||
ActiveTargets []struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
Health string `json:"health"`
|
||||
LastError string `json:"lastError"`
|
||||
ScrapeURL string `json:"scrapeUrl"`
|
||||
} `json:"activeTargets"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
fmt.Printf("Failed to parse targets: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Prometheus targets for job '%s':\n", job)
|
||||
found := false
|
||||
for _, target := range result.Data.ActiveTargets {
|
||||
if target.Labels["job"] == job {
|
||||
found = true
|
||||
fmt.Printf(" - %s: health=%s, lastError=%s\n",
|
||||
target.ScrapeURL, target.Health, target.LastError)
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
fmt.Printf(" No targets found for job '%s'\n", job)
|
||||
fmt.Printf(" Available jobs: ")
|
||||
jobs := make(map[string]bool)
|
||||
for _, target := range result.Data.ActiveTargets {
|
||||
jobs[target.Labels["job"]] = true
|
||||
}
|
||||
for j := range jobs {
|
||||
fmt.Printf("%s ", j)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
|
||||
// HasMetrics checks if the specified job has any metrics available.
|
||||
func (m *Manager) HasMetrics(ctx context.Context, job string) bool {
|
||||
query := fmt.Sprintf(`up{job="%s"}`, job)
|
||||
result, err := m.Query(ctx, query)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return len(result.Data.Result) > 0 && result.Data.Result[0].Value[1] == "1"
|
||||
}
|
||||
|
||||
// QueryResponse represents a Prometheus query response.
|
||||
type QueryResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []struct {
|
||||
Metric map[string]string `json:"metric"`
|
||||
Value []interface{} `json:"value"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// Query executes a PromQL query and returns the response.
|
||||
func (m *Manager) Query(ctx context.Context, query string) (*QueryResponse, error) {
|
||||
u := fmt.Sprintf("%s/api/v1/query?query=%s", m.URL(), url.QueryEscape(query))
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying prometheus: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading response: %w", err)
|
||||
}
|
||||
|
||||
var result QueryResponse
|
||||
if err := json.Unmarshal(body, &result); err != nil {
|
||||
return nil, fmt.Errorf("parsing response: %w", err)
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
// CollectMetrics collects all metrics for a scenario and writes to output directory.
|
||||
func (m *Manager) CollectMetrics(ctx context.Context, job, outputDir, scenario string) error {
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating output directory: %w", err)
|
||||
}
|
||||
|
||||
timeRange := "10m"
|
||||
|
||||
// For S6 (restart scenario), use increase() to handle counter resets
|
||||
useIncrease := scenario == "S6"
|
||||
|
||||
// Counter metrics
|
||||
counterMetrics := []string{
|
||||
"reloader_reconcile_total",
|
||||
"reloader_action_total",
|
||||
"reloader_skipped_total",
|
||||
"reloader_errors_total",
|
||||
"reloader_events_received_total",
|
||||
"reloader_workloads_scanned_total",
|
||||
"reloader_workloads_matched_total",
|
||||
"reloader_reload_executed_total",
|
||||
}
|
||||
|
||||
for _, metric := range counterMetrics {
|
||||
var query string
|
||||
if useIncrease {
|
||||
query = fmt.Sprintf(`sum(increase(%s{job="%s"}[%s])) by (success, reason)`, metric, job, timeRange)
|
||||
} else {
|
||||
query = fmt.Sprintf(`sum(%s{job="%s"}) by (success, reason)`, metric, job)
|
||||
}
|
||||
|
||||
if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, metric+".json")); err != nil {
|
||||
fmt.Printf("Warning: failed to collect %s: %v\n", metric, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Histogram percentiles
|
||||
histogramMetrics := []struct {
|
||||
name string
|
||||
prefix string
|
||||
}{
|
||||
{"reloader_reconcile_duration_seconds", "reconcile"},
|
||||
{"reloader_action_latency_seconds", "action"},
|
||||
}
|
||||
|
||||
for _, hm := range histogramMetrics {
|
||||
for _, pct := range []int{50, 95, 99} {
|
||||
quantile := float64(pct) / 100
|
||||
query := fmt.Sprintf(`histogram_quantile(%v, sum(rate(%s_bucket{job="%s"}[%s])) by (le))`,
|
||||
quantile, hm.name, job, timeRange)
|
||||
outFile := filepath.Join(outputDir, fmt.Sprintf("%s_p%d.json", hm.prefix, pct))
|
||||
if err := m.queryAndSave(ctx, query, outFile); err != nil {
|
||||
fmt.Printf("Warning: failed to collect %s p%d: %v\n", hm.name, pct, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// REST client metrics
|
||||
restQueries := map[string]string{
|
||||
"rest_client_requests_total.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s"})`, job),
|
||||
"rest_client_requests_get.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="GET"})`, job),
|
||||
"rest_client_requests_patch.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PATCH"})`, job),
|
||||
"rest_client_requests_put.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",method="PUT"})`, job),
|
||||
"rest_client_requests_errors.json": fmt.Sprintf(`sum(rest_client_requests_total{job="%s",code=~"[45].."}) or vector(0)`, job),
|
||||
}
|
||||
|
||||
for filename, query := range restQueries {
|
||||
if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil {
|
||||
fmt.Printf("Warning: failed to collect %s: %v\n", filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Resource consumption metrics (memory, CPU, goroutines)
|
||||
resourceQueries := map[string]string{
|
||||
// Memory metrics (in bytes)
|
||||
"memory_rss_bytes_avg.json": fmt.Sprintf(`avg_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange),
|
||||
"memory_rss_bytes_max.json": fmt.Sprintf(`max_over_time(process_resident_memory_bytes{job="%s"}[%s])`, job, timeRange),
|
||||
"memory_rss_bytes_cur.json": fmt.Sprintf(`process_resident_memory_bytes{job="%s"}`, job),
|
||||
|
||||
// Heap memory (Go runtime)
|
||||
"memory_heap_bytes_avg.json": fmt.Sprintf(`avg_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange),
|
||||
"memory_heap_bytes_max.json": fmt.Sprintf(`max_over_time(go_memstats_heap_alloc_bytes{job="%s"}[%s])`, job, timeRange),
|
||||
|
||||
// CPU metrics (rate of CPU seconds used)
|
||||
"cpu_usage_cores_avg.json": fmt.Sprintf(`rate(process_cpu_seconds_total{job="%s"}[%s])`, job, timeRange),
|
||||
"cpu_usage_cores_max.json": fmt.Sprintf(`max_over_time(rate(process_cpu_seconds_total{job="%s"}[1m])[%s:1m])`, job, timeRange),
|
||||
|
||||
// Goroutines (concurrency indicator)
|
||||
"goroutines_avg.json": fmt.Sprintf(`avg_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange),
|
||||
"goroutines_max.json": fmt.Sprintf(`max_over_time(go_goroutines{job="%s"}[%s])`, job, timeRange),
|
||||
"goroutines_cur.json": fmt.Sprintf(`go_goroutines{job="%s"}`, job),
|
||||
|
||||
// GC metrics
|
||||
"gc_duration_seconds_p99.json": fmt.Sprintf(`histogram_quantile(0.99, sum(rate(go_gc_duration_seconds_bucket{job="%s"}[%s])) by (le))`, job, timeRange),
|
||||
|
||||
// Threads
|
||||
"threads_cur.json": fmt.Sprintf(`go_threads{job="%s"}`, job),
|
||||
}
|
||||
|
||||
for filename, query := range resourceQueries {
|
||||
if err := m.queryAndSave(ctx, query, filepath.Join(outputDir, filename)); err != nil {
|
||||
fmt.Printf("Warning: failed to collect %s: %v\n", filename, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) queryAndSave(ctx context.Context, query, outputPath string) error {
|
||||
result, err := m.Query(ctx, query)
|
||||
if err != nil {
|
||||
// Write empty result on error
|
||||
emptyResult := `{"status":"success","data":{"resultType":"vector","result":[]}}`
|
||||
return os.WriteFile(outputPath, []byte(emptyResult), 0644)
|
||||
}
|
||||
|
||||
data, err := json.MarshalIndent(result, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(outputPath, data, 0644)
|
||||
}
|
||||
2092
test/loadtest/internal/scenarios/scenarios.go
Normal file
2092
test/loadtest/internal/scenarios/scenarios.go
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user