diff --git a/hack/dashboard.yaml b/hack/dashboard.yaml new file mode 100644 index 0000000..3443d77 --- /dev/null +++ b/hack/dashboard.yaml @@ -0,0 +1,94 @@ +title: Wonderwall +editable: true +tags: [generated, yaml] +auto_refresh: 1m +time: ["now-24h", "now"] +timezone: default # valid values are: utc, browser, default + +# Render to JSON using https://github.com/K-Phoen/grabana v0.17.0 or newer +# Import into Grafana using UI (remember to select folder) + +variables: + - custom: + name: env + default: dev + values_map: + dev: dev + prod: prod + - datasource: + name: ds + type: prometheus + regex: $env-gcp + include_all: true + hide: variable + - query: + name: redis_op + label: Redis Operation + datasource: $env-gcp + request: "label_values(wonderwall_redis_latency_bucket, operation)" + include_all: true + default_all: true + hide: variable + +rows: + - name: Resource usage + collapse: false + panels: + - graph: + title: Memory usage - $ds + datasource: $ds + transparent: true + span: 4 + targets: + - prometheus: + query: avg(kube_pod_container_resource_limits{container="wonderwall",resource="memory"}) by (namespace) + legend: "limits in {{ namespace }}" + - prometheus: + query: avg(kube_pod_container_resource_requests{container="wonderwall",resource="memory"}) by (namespace) + legend: "requests in {{ namespace }}" + - prometheus: + query: sum(container_memory_working_set_bytes{container="wonderwall"}) by (pod, namespace) + legend: "working set {{ pod }} in {{ namespace }}" + - prometheus: + query: sum(container_memory_usage_bytes{container="wonderwall"}) by (pod, namespace) + legend: "Resident set size {{ pod }} in {{ namespace }}" + - graph: + title: CPU usage - $ds + datasource: $ds + transparent: true + span: 4 + targets: + - prometheus: + query: avg(kube_pod_container_resource_limits{container="wonderwall",resource="cpu"}) by (namespace) + legend: "limits in {{ namespace }}" + - prometheus: + query: avg(kube_pod_container_resource_requests{container="wonderwall",resource="cpu"}) by (namespace) + legend: "requests in {{ namespace }}" + - prometheus: + query: sum(irate(container_cpu_usage_seconds_total{container="wonderwall"}[2m])) by (pod, namespace) + legend: "{{ pod }} in {{ namespace }}" + - name: Redis Latency - $redis_op + repeat_for: redis_op + collapse: true + panels: + - heatmap: + # Must be done manually in Grafana after import: Set max datapoints to 25 + title: $ds + datasource: $ds + repeat: ds + data_format: time_series_buckets + hide_zero_buckets: true + transparent: true + span: 4 + tooltip: + show: true + showhistogram: false + decimals: 0 + yaxis: + unit: "dtdurations" + decimals: 0 + targets: + - prometheus: + query: sum(increase(wonderwall_redis_latency_bucket{operation="$redis_op"}[$__interval])) by (le) + legend: "{{ le }}" + format: heatmap diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index e7cc41b..b9ef95a 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,11 +1,44 @@ package metrics import ( + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "net/http" + "time" +) + +const ( + Namespace = "wonderwall" + + RedisOperationLabel = "operation" +) + +var ( + RedisLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Name: "redis_latency", + Namespace: Namespace, + Help: "latency in redis operations", + Buckets: prometheus.ExponentialBuckets(0.02, 2, 14), + }, []string{RedisOperationLabel}) ) func Handle(address string) error { handler := promhttp.Handler() return http.ListenAndServe(address, handler) } + +func Register(registry prometheus.Registerer) { + registry.MustRegister( + RedisLatency, + ) +} + +func ObserveRedisLatency(operation string, fun func() error) error { + timer := time.Now() + err := fun() + used := time.Now().Sub(timer) + RedisLatency.With(prometheus.Labels{ + RedisOperationLabel: operation, + }).Observe(used.Seconds()) + return err +} diff --git a/pkg/session/redis.go b/pkg/session/redis.go index 6963385..cf901b1 100644 --- a/pkg/session/redis.go +++ b/pkg/session/redis.go @@ -3,6 +3,7 @@ package session import ( "context" "github.com/go-redis/redis/v8" + "github.com/nais/wonderwall/pkg/metrics" "time" ) @@ -20,8 +21,12 @@ func NewRedis(client redis.Cmdable) Store { func (s *redisSessionStore) Read(ctx context.Context, key string) (*Data, error) { data := &Data{} - status := s.client.Get(ctx, key) - err := status.Scan(data) + err := metrics.ObserveRedisLatency("Read", func() error { + var err error + status := s.client.Get(ctx, key) + err = status.Scan(data) + return err + }) if err != nil { return nil, err } @@ -29,11 +34,15 @@ func (s *redisSessionStore) Read(ctx context.Context, key string) (*Data, error) } func (s *redisSessionStore) Write(ctx context.Context, key string, value *Data, expiration time.Duration) error { - status := s.client.Set(ctx, key, value, expiration) - return status.Err() + return metrics.ObserveRedisLatency("Write", func() error { + status := s.client.Set(ctx, key, value, expiration) + return status.Err() + }) } func (s *redisSessionStore) Delete(ctx context.Context, keys ...string) error { - status := s.client.Del(ctx, keys...) - return status.Err() + return metrics.ObserveRedisLatency("Delete", func() error { + status := s.client.Del(ctx, keys...) + return status.Err() + }) }