mirror of
https://github.com/bloomberg/goldpinger.git
synced 2026-04-21 09:26:49 +00:00
Add an opt-in UDP echo probe that runs alongside the existing HTTP
ping. Each goldpinger pod listens on a configurable UDP port (default
6969). During each ping cycle, the prober sends N sequenced packets
to the peer's listener, which echoes them back. From the replies we
compute packet loss percentage, path hop count (from IPv4 TTL / IPv6
HopLimit), and average round-trip time.
New Prometheus metrics:
- goldpinger_peers_loss_pct (gauge) — per-peer UDP loss %
- goldpinger_peers_path_length (gauge) — estimated hop count
- goldpinger_peers_udp_rtt_ms (histogram) — UDP RTT in milliseconds
The graph UI shows yellow edges for links with partial loss, and
displays sub-millisecond UDP RTT instead of HTTP latency when UDP
is enabled. Stale metric labels are cleaned up when a pinger is
destroyed so rolled pods don't leave ghost entries.
Configuration (all via env vars, disabled by default):
UDP_ENABLED=true enable UDP probing and listener
UDP_PORT=6969 listener port
UDP_PACKET_COUNT=10 packets per probe
UDP_PACKET_SIZE=64 bytes per packet
UDP_TIMEOUT=1s probe timeout
New files:
pkg/goldpinger/udp_probe.go — echo listener + probe client
pkg/goldpinger/udp_probe_test.go — unit tests
Unit tests:
```
=== RUN TestProbeUDP_NoLoss
udp_probe_test.go:51: avg UDP RTT: 0.0823 ms
--- PASS: TestProbeUDP_NoLoss (0.00s)
=== RUN TestProbeUDP_FullLoss
--- PASS: TestProbeUDP_FullLoss (0.00s)
=== RUN TestProbeUDP_PacketFormat
--- PASS: TestProbeUDP_PacketFormat (0.00s)
=== RUN TestEstimateHops
--- PASS: TestEstimateHops (0.00s)
PASS
```
Cluster test (6-node IPv6 k8s, UDP_ENABLED=true):
```
Prometheus metrics (healthy cluster, 0% loss):
goldpinger_peers_loss_pct{...,pod_ip="fd00:4:69:3::3746"} 0
goldpinger_peers_path_length{...,pod_ip="fd00:4:69:3::3746"} 0
Simulated 50% loss via ip6tables DROP in pod netns on node-0:
goldpinger_peers_loss_pct{instance="server",...} 60
goldpinger_peers_loss_pct{instance="node-1",...} 30
goldpinger_peers_loss_pct{instance="server2",...} 30
UDP RTT vs HTTP RTT (check_all API):
node-0 -> server: udp=2.18ms http=2ms
node-2 -> node-2: udp=0.40ms http=1ms
server -> node-0: udp=0.55ms http=2ms
Post-rollout stale metrics cleanup verified:
All 36 edges show 0% loss, no stale pod IPs.
```
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Cooper Ry Lees <me@cooperlees.com>
82 lines
2.1 KiB
Go
82 lines
2.1 KiB
Go
// Code generated by go-swagger; DO NOT EDIT.
|
|
|
|
package operations
|
|
|
|
import (
|
|
"errors"
|
|
"net/url"
|
|
golangswaggerpaths "path"
|
|
)
|
|
|
|
// CheckServicePodsURL generates an URL for the check service pods operation
|
|
type CheckServicePodsURL struct {
|
|
_basePath string
|
|
}
|
|
|
|
// WithBasePath sets the base path for this url builder, only required when it's different from the
|
|
// base path specified in the swagger spec.
|
|
// When the value of the base path is an empty string
|
|
func (o *CheckServicePodsURL) WithBasePath(bp string) *CheckServicePodsURL {
|
|
o.SetBasePath(bp)
|
|
return o
|
|
}
|
|
|
|
// SetBasePath sets the base path for this url builder, only required when it's different from the
|
|
// base path specified in the swagger spec.
|
|
// When the value of the base path is an empty string
|
|
func (o *CheckServicePodsURL) SetBasePath(bp string) {
|
|
o._basePath = bp
|
|
}
|
|
|
|
// Build a url path and query string
|
|
func (o *CheckServicePodsURL) Build() (*url.URL, error) {
|
|
var _result url.URL
|
|
|
|
var _path = "/check"
|
|
|
|
_basePath := o._basePath
|
|
_result.Path = golangswaggerpaths.Join(_basePath, _path)
|
|
|
|
return &_result, nil
|
|
}
|
|
|
|
// Must is a helper function to panic when the url builder returns an error
|
|
func (o *CheckServicePodsURL) Must(u *url.URL, err error) *url.URL {
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
if u == nil {
|
|
panic("url can't be nil")
|
|
}
|
|
return u
|
|
}
|
|
|
|
// String returns the string representation of the path with query string
|
|
func (o *CheckServicePodsURL) String() string {
|
|
return o.Must(o.Build()).String()
|
|
}
|
|
|
|
// BuildFull builds a full url with scheme, host, path and query string
|
|
func (o *CheckServicePodsURL) BuildFull(scheme, host string) (*url.URL, error) {
|
|
if scheme == "" {
|
|
return nil, errors.New("scheme is required for a full url on CheckServicePodsURL")
|
|
}
|
|
if host == "" {
|
|
return nil, errors.New("host is required for a full url on CheckServicePodsURL")
|
|
}
|
|
|
|
base, err := o.Build()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
base.Scheme = scheme
|
|
base.Host = host
|
|
return base, nil
|
|
}
|
|
|
|
// StringFull returns the string representation of a complete url
|
|
func (o *CheckServicePodsURL) StringFull(scheme, host string) string {
|
|
return o.Must(o.BuildFull(scheme, host)).String()
|
|
}
|