Files
goldpinger/pkg/models/check_all_pod_result.go
Cooper Ry Lees 832bc7b598 Add UDP probe metrics: packet loss, hop count, and RTT
Add an opt-in UDP echo probe that runs alongside the existing HTTP
ping. Each goldpinger pod listens on a configurable UDP port (default
6969). During each ping cycle, the prober sends N sequenced packets
to the peer's listener, which echoes them back. From the replies we
compute packet loss percentage, path hop count (from IPv4 TTL / IPv6
HopLimit), and average round-trip time.

New Prometheus metrics:
  - goldpinger_peers_loss_pct      (gauge)     — per-peer UDP loss %
  - goldpinger_peers_path_length   (gauge)     — estimated hop count
  - goldpinger_peers_udp_rtt_ms    (histogram) — UDP RTT in milliseconds

The graph UI shows yellow edges for links with partial loss, and
displays sub-millisecond UDP RTT instead of HTTP latency when UDP
is enabled. Stale metric labels are cleaned up when a pinger is
destroyed so rolled pods don't leave ghost entries.

Configuration (all via env vars, disabled by default):
  UDP_ENABLED=true      enable UDP probing and listener
  UDP_PORT=6969         listener port
  UDP_PACKET_COUNT=10   packets per probe
  UDP_PACKET_SIZE=64    bytes per packet
  UDP_TIMEOUT=1s        probe timeout

New files:
  pkg/goldpinger/udp_probe.go       — echo listener + probe client
  pkg/goldpinger/udp_probe_test.go  — unit tests

Unit tests:
```
=== RUN   TestProbeUDP_NoLoss
    udp_probe_test.go:51: avg UDP RTT: 0.0823 ms
--- PASS: TestProbeUDP_NoLoss (0.00s)
=== RUN   TestProbeUDP_FullLoss
--- PASS: TestProbeUDP_FullLoss (0.00s)
=== RUN   TestProbeUDP_PacketFormat
--- PASS: TestProbeUDP_PacketFormat (0.00s)
=== RUN   TestEstimateHops
--- PASS: TestEstimateHops (0.00s)
PASS
```

Cluster test (6-node IPv6 k8s, UDP_ENABLED=true):
```
Prometheus metrics (healthy cluster, 0% loss):
  goldpinger_peers_loss_pct{...,pod_ip="fd00:4:69:3::3746"} 0
  goldpinger_peers_path_length{...,pod_ip="fd00:4:69:3::3746"} 0

Simulated 50% loss via ip6tables DROP in pod netns on node-0:
  goldpinger_peers_loss_pct{instance="server",...} 60
  goldpinger_peers_loss_pct{instance="node-1",...} 30
  goldpinger_peers_loss_pct{instance="server2",...} 30

UDP RTT vs HTTP RTT (check_all API):
  node-0 -> server:  udp=2.18ms  http=2ms
  node-2 -> node-2:  udp=0.40ms  http=1ms
  server -> node-0:  udp=0.55ms  http=2ms

Post-rollout stale metrics cleanup verified:
  All 36 edges show 0% loss, no stale pod IPs.
```

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Cooper Ry Lees <me@cooperlees.com>
2026-03-27 16:05:32 +00:00

166 lines
3.4 KiB
Go

// Code generated by go-swagger; DO NOT EDIT.
package models
import (
"context"
stderrors "errors"
"github.com/go-openapi/errors"
"github.com/go-openapi/strfmt"
"github.com/go-openapi/swag"
"github.com/go-openapi/validate"
)
// CheckAllPodResult check all pod result
//
// swagger:model CheckAllPodResult
type CheckAllPodResult struct {
// host IP
// Format: ipv4
HostIP strfmt.IPv4 `json:"HostIP,omitempty"`
// o k
OK *bool `json:"OK,omitempty"`
// pod IP
// Format: ipv4
PodIP strfmt.IPv4 `json:"PodIP,omitempty"`
// error
Error string `json:"error,omitempty"`
// response
Response *CheckResults `json:"response,omitempty"`
// status code
StatusCode int32 `json:"status-code,omitempty"`
}
// Validate validates this check all pod result
func (m *CheckAllPodResult) Validate(formats strfmt.Registry) error {
var res []error
if err := m.validateHostIP(formats); err != nil {
res = append(res, err)
}
if err := m.validatePodIP(formats); err != nil {
res = append(res, err)
}
if err := m.validateResponse(formats); err != nil {
res = append(res, err)
}
if len(res) > 0 {
return errors.CompositeValidationError(res...)
}
return nil
}
func (m *CheckAllPodResult) validateHostIP(formats strfmt.Registry) error {
if swag.IsZero(m.HostIP) { // not required
return nil
}
if err := validate.FormatOf("HostIP", "body", "ipv4", m.HostIP.String(), formats); err != nil {
return err
}
return nil
}
func (m *CheckAllPodResult) validatePodIP(formats strfmt.Registry) error {
if swag.IsZero(m.PodIP) { // not required
return nil
}
if err := validate.FormatOf("PodIP", "body", "ipv4", m.PodIP.String(), formats); err != nil {
return err
}
return nil
}
func (m *CheckAllPodResult) validateResponse(formats strfmt.Registry) error {
if swag.IsZero(m.Response) { // not required
return nil
}
if m.Response != nil {
if err := m.Response.Validate(formats); err != nil {
ve := new(errors.Validation)
if stderrors.As(err, &ve) {
return ve.ValidateName("response")
}
ce := new(errors.CompositeError)
if stderrors.As(err, &ce) {
return ce.ValidateName("response")
}
return err
}
}
return nil
}
// ContextValidate validate this check all pod result based on the context it is used
func (m *CheckAllPodResult) ContextValidate(ctx context.Context, formats strfmt.Registry) error {
var res []error
if err := m.contextValidateResponse(ctx, formats); err != nil {
res = append(res, err)
}
if len(res) > 0 {
return errors.CompositeValidationError(res...)
}
return nil
}
func (m *CheckAllPodResult) contextValidateResponse(ctx context.Context, formats strfmt.Registry) error {
if m.Response != nil {
if swag.IsZero(m.Response) { // not required
return nil
}
if err := m.Response.ContextValidate(ctx, formats); err != nil {
ve := new(errors.Validation)
if stderrors.As(err, &ve) {
return ve.ValidateName("response")
}
ce := new(errors.CompositeError)
if stderrors.As(err, &ce) {
return ce.ValidateName("response")
}
return err
}
}
return nil
}
// MarshalBinary interface implementation
func (m *CheckAllPodResult) MarshalBinary() ([]byte, error) {
if m == nil {
return nil, nil
}
return swag.WriteJSON(m)
}
// UnmarshalBinary interface implementation
func (m *CheckAllPodResult) UnmarshalBinary(b []byte) error {
var res CheckAllPodResult
if err := swag.ReadJSON(b, &res); err != nil {
return err
}
*m = res
return nil
}