mirror of
https://github.com/projectcapsule/capsule.git
synced 2026-02-14 09:59:57 +00:00
feat: improve resourcepool monitoring (#1488)
* feat(resourcepools): add improved metrics Signed-off-by: Oliver Bähler <oliverbaehler@hotmail.com> * feat(helm): add resourcepool dashboard Signed-off-by: Oliver Bähler <oliverbaehler@hotmail.com> --------- Signed-off-by: Oliver Bähler <oliverbaehler@hotmail.com>
This commit is contained in:
1
Makefile
1
Makefile
@@ -96,6 +96,7 @@ helm-test-exec: ct helm-controller-version ko-build-all
|
||||
$(MAKE) e2e-load-image CLUSTER_NAME=capsule-charts IMAGE=$(CAPSULE_IMG) VERSION=v0.0.0
|
||||
$(MAKE) e2e-load-image CLUSTER_NAME=capsule-charts IMAGE=$(CAPSULE_IMG) VERSION=tracing
|
||||
@$(KUBECTL) create ns capsule-system || true
|
||||
@$(KUBECTL) apply --force-conflicts --server-side=true -f https://github.com/grafana/grafana-operator/releases/download/v5.18.0/crds.yaml
|
||||
@$(KUBECTL) apply --force-conflicts --server-side=true -f https://github.com/cert-manager/cert-manager/releases/download/v1.9.1/cert-manager.crds.yaml
|
||||
@$(KUBECTL) apply --force-conflicts --server-side=true -f https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.58.0/bundle.yaml
|
||||
@$(CT) install --config $(SRC_ROOT)/.github/configs/ct.yaml --namespace=capsule-system --all --debug
|
||||
|
||||
@@ -24,6 +24,8 @@ type ResourcePoolStatus struct {
|
||||
Claims ResourcePoolNamespaceClaimsStatus `json:"claims,omitempty"`
|
||||
// Tracks the Usage from Claimed against what has been granted from the pool
|
||||
Allocation ResourcePoolQuotaStatus `json:"allocation,omitempty"`
|
||||
// Exhaustions from claims associated with the pool
|
||||
Exhaustions map[string]api.PoolExhaustionResource `json:"exhaustions,omitempty"`
|
||||
}
|
||||
|
||||
type ResourcePoolNamespaceClaimsStatus map[string]ResourcePoolClaimsList
|
||||
|
||||
@@ -887,6 +887,13 @@ func (in *ResourcePoolStatus) DeepCopyInto(out *ResourcePoolStatus) {
|
||||
}
|
||||
}
|
||||
in.Allocation.DeepCopyInto(&out.Allocation)
|
||||
if in.Exhaustions != nil {
|
||||
in, out := &in.Exhaustions, &out.Exhaustions
|
||||
*out = make(map[string]api.PoolExhaustionResource, len(*in))
|
||||
for key, val := range *in {
|
||||
(*out)[key] = *val.DeepCopy()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourcePoolStatus.
|
||||
|
||||
@@ -178,20 +178,29 @@ Here the values you can override:
|
||||
| manager.volumes | list | `[]` | Set the additional volumes needed for the Capsule manager container |
|
||||
| manager.webhookPort | int | `9443` | Set an alternative to the default container port. Useful for use in some kubernetes clusters (such as GKE Private) with aggregator routing turned on, because pod ports have to be opened manually on the firewall side |
|
||||
|
||||
### ServiceMonitor Parameters
|
||||
### Monitoring Parameters
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| serviceMonitor.annotations | object | `{}` | Assign additional Annotations |
|
||||
| serviceMonitor.enabled | bool | `false` | Enable ServiceMonitor |
|
||||
| serviceMonitor.endpoint.interval | string | `"15s"` | Set the scrape interval for the endpoint of the serviceMonitor |
|
||||
| serviceMonitor.endpoint.metricRelabelings | list | `[]` | Set metricRelabelings for the endpoint of the serviceMonitor |
|
||||
| serviceMonitor.endpoint.relabelings | list | `[]` | Set relabelings for the endpoint of the serviceMonitor |
|
||||
| serviceMonitor.endpoint.scrapeTimeout | string | `""` | Set the scrape timeout for the endpoint of the serviceMonitor |
|
||||
| serviceMonitor.labels | object | `{}` | Assign additional labels according to Prometheus' serviceMonitorSelector matching labels |
|
||||
| serviceMonitor.matchLabels | object | `{}` | Change matching labels |
|
||||
| serviceMonitor.namespace | string | `""` | Install the ServiceMonitor into a different Namespace, as the monitoring stack one (default: the release one) |
|
||||
| serviceMonitor.targetLabels | list | `[]` | Set targetLabels for the serviceMonitor |
|
||||
| monitoring.dashboards.annotations | object | `{}` | Annotations for dashboard configmaps |
|
||||
| monitoring.dashboards.enabled | bool | `false` | Enable Dashboards to be deployed |
|
||||
| monitoring.dashboards.labels | object | `{}` | Labels for dashboard configmaps |
|
||||
| monitoring.dashboards.namespace | string | `""` | Custom namespace for dashboard configmaps |
|
||||
| monitoring.dashboards.operator.allowCrossNamespaceImport | bool | `true` | Allow the Operator to match this resource with Grafanas outside the current namespace |
|
||||
| monitoring.dashboards.operator.enabled | bool | `true` | Enable Operator Resources (GrafanaDashboard) |
|
||||
| monitoring.dashboards.operator.folder | string | `""` | folder assignment for dashboard |
|
||||
| monitoring.dashboards.operator.instanceSelector | object | `{}` | Selects Grafana instances for import |
|
||||
| monitoring.dashboards.operator.resyncPeriod | string | `"10m"` | How often the resource is synced, defaults to 10m0s if not set |
|
||||
| monitoring.serviceMonitor.annotations | object | `{}` | Assign additional Annotations |
|
||||
| monitoring.serviceMonitor.enabled | bool | `false` | Enable ServiceMonitor |
|
||||
| monitoring.serviceMonitor.endpoint.interval | string | `"15s"` | Set the scrape interval for the endpoint of the serviceMonitor |
|
||||
| monitoring.serviceMonitor.endpoint.metricRelabelings | list | `[]` | Set metricRelabelings for the endpoint of the serviceMonitor |
|
||||
| monitoring.serviceMonitor.endpoint.relabelings | list | `[]` | Set relabelings for the endpoint of the serviceMonitor |
|
||||
| monitoring.serviceMonitor.endpoint.scrapeTimeout | string | `""` | Set the scrape timeout for the endpoint of the serviceMonitor |
|
||||
| monitoring.serviceMonitor.labels | object | `{}` | Assign additional labels according to Prometheus' serviceMonitorSelector matching labels |
|
||||
| monitoring.serviceMonitor.matchLabels | object | `{}` | Change matching labels |
|
||||
| monitoring.serviceMonitor.namespace | string | `""` | Install the ServiceMonitor into a different Namespace, as the monitoring stack one (default: the release one) |
|
||||
| monitoring.serviceMonitor.targetLabels | list | `[]` | Set targetLabels for the serviceMonitor |
|
||||
|
||||
### Webhooks Parameters
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ Here the values you can override:
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
{{- range .Values }}
|
||||
{{- if not (or (hasPrefix "global" .Key) (hasPrefix "manager" .Key) (hasPrefix "crds" .Key) (hasPrefix "serviceMonitor" .Key) (hasPrefix "webhook" .Key) (hasPrefix "capsule-proxy" .Key) ) }}
|
||||
{{- if not (or (hasPrefix "global" .Key) (hasPrefix "manager" .Key) (hasPrefix "crds" .Key) (hasPrefix "monitoring" .Key) (hasPrefix "webhook" .Key) (hasPrefix "capsule-proxy" .Key) ) }}
|
||||
| {{ .Key }} | {{ .Type }} | {{ if .Default }}{{ .Default }}{{ else }}{{ .AutoDefault }}{{ end }} | {{ if .Description }}{{ .Description }}{{ else }}{{ .AutoDescription }}{{ end }} |
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -127,12 +127,12 @@ Here the values you can override:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
### ServiceMonitor Parameters
|
||||
### Monitoring Parameters
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
{{- range .Values }}
|
||||
{{- if hasPrefix "serviceMonitor" .Key }}
|
||||
{{- if hasPrefix "monitoring" .Key }}
|
||||
| {{ .Key }} | {{ .Type }} | {{ if .Default }}{{ .Default }}{{ else }}{{ .AutoDefault }}{{ end }} | {{ if .Description }}{{ .Description }}{{ else }}{{ .AutoDescription }}{{ end }} |
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
9
charts/capsule/ci/monitoring-values.yaml
Normal file
9
charts/capsule/ci/monitoring-values.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
monitoring:
|
||||
dashboards:
|
||||
enabled: true
|
||||
annotations:
|
||||
k8s-sidecar-target-directory: /tmp/dashboards/Capsule
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
operator:
|
||||
enabled: true
|
||||
@@ -291,6 +291,26 @@ spec:
|
||||
type: array
|
||||
description: Tracks the quotas for the Resource.
|
||||
type: object
|
||||
exhaustions:
|
||||
additionalProperties:
|
||||
properties:
|
||||
available:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Available Resources to be claimed
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
requesting:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Requesting Resources
|
||||
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
|
||||
x-kubernetes-int-or-string: true
|
||||
type: object
|
||||
description: Exhaustions from claims associated with the pool
|
||||
type: object
|
||||
namespaceCount:
|
||||
default: 0
|
||||
description: How many namespaces are considered
|
||||
|
||||
1590
charts/capsule/dashboards/resourcepools-dashboard.json
Normal file
1590
charts/capsule/dashboards/resourcepools-dashboard.json
Normal file
File diff suppressed because it is too large
Load Diff
51
charts/capsule/templates/dashboards.yaml
Normal file
51
charts/capsule/templates/dashboards.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
|
||||
{{- if $.Values.monitoring.dashboards.enabled }}
|
||||
{{ range $path, $_ := .Files.Glob "dashboards/**-dashboard.json" }}
|
||||
{{- with $ }}
|
||||
{{- $content := (.Files.Get $path) }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "capsule.fullname" . }}-{{ $path | base | trimSuffix "-dashboard.json" | regexFind "[^_]+$" }}-dashboard
|
||||
namespace: {{ default $.Release.Namespace $.Values.monitoring.dashboards.namespace | quote }}
|
||||
annotations:
|
||||
{{- with $.Values.monitoring.dashboards.annotations }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "capsule.labels" . | nindent 4 }}
|
||||
{{- with $.Values.monitoring.dashboards.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
data:
|
||||
{{ base $path }}: |-
|
||||
{{- $content | nindent 4 }}
|
||||
|
||||
{{- if $.Values.monitoring.dashboards.operator.enabled }}
|
||||
---
|
||||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: {{ include "capsule.fullname" . }}-{{ $path | base | trimSuffix "-dashboard.json" | regexFind "[^_]+$" }}
|
||||
namespace: {{ default $.Release.Namespace $.Values.monitoring.dashboards.namespace | quote }}
|
||||
annotations:
|
||||
{{- with $.Values.monitoring.dashboards.annotations }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "capsule.labels" . | nindent 4 }}
|
||||
{{- with $.Values.monitoring.dashboards.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
configMapRef:
|
||||
name: {{ include "capsule.fullname" . }}-{{ $path | base | trimSuffix "-dashboard.json" | regexFind "[^_]+$" }}-dashboard
|
||||
key: {{ base $path }}
|
||||
{{- with (omit $.Values.monitoring.dashboards.operator "enabled") }}
|
||||
{{- toYaml . | nindent 2 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,22 +1,23 @@
|
||||
{{- if not $.Values.crds.exclusive }}
|
||||
{{- if .Values.serviceMonitor.enabled }}
|
||||
{{- with (mergeOverwrite .Values.monitoring.serviceMonitor (default dict .Values.serviceMonitor)) -}}
|
||||
{{- if .enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ include "capsule.fullname" . }}-monitor
|
||||
namespace: {{ .Values.serviceMonitor.namespace | default .Release.Namespace }}
|
||||
name: {{ include "capsule.fullname" $ }}
|
||||
namespace: {{ .namespace | default $.Release.Namespace }}
|
||||
labels:
|
||||
{{- include "capsule.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceMonitor.labels }}
|
||||
{{- with .labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- with .Values.serviceMonitor.annotations }}
|
||||
{{- with .annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
endpoints:
|
||||
{{- with .Values.serviceMonitor.endpoint }}
|
||||
{{- with .endpoint }}
|
||||
- interval: {{ .interval }}
|
||||
port: metrics
|
||||
path: /metrics
|
||||
@@ -31,18 +32,19 @@ spec:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
jobLabel: app.kubernetes.io/name
|
||||
{{- with .Values.serviceMonitor.targetLabels }}
|
||||
{{- with .targetLabels }}
|
||||
targetLabels: {{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- if .Values.serviceMonitor.matchLabels }}
|
||||
{{- toYaml .Values.serviceMonitor.matchLabels | nindent 6 }}
|
||||
{{- if .matchLabels }}
|
||||
{{- toYaml .matchLabels | nindent 6 }}
|
||||
{{- else }}
|
||||
{{- include "capsule.labels" . | nindent 6 }}
|
||||
{{- include "capsule.selectorLabels" $ | nindent 6 }}
|
||||
{{- end }}
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Release.Namespace }}
|
||||
- {{ $.Release.Namespace }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -331,6 +331,94 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"monitoring": {
|
||||
"properties": {
|
||||
"dashboards": {
|
||||
"properties": {
|
||||
"annotations": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"labels": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"namespace": {
|
||||
"type": "string"
|
||||
},
|
||||
"operator": {
|
||||
"properties": {
|
||||
"allowCrossNamespaceImport": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"folder": {
|
||||
"type": "string"
|
||||
},
|
||||
"instanceSelector": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"resyncPeriod": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"serviceMonitor": {
|
||||
"properties": {
|
||||
"annotations": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"endpoint": {
|
||||
"properties": {
|
||||
"interval": {
|
||||
"type": "string"
|
||||
},
|
||||
"metricRelabelings": {
|
||||
"type": "array"
|
||||
},
|
||||
"relabelings": {
|
||||
"type": "array"
|
||||
},
|
||||
"scrapeTimeout": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"labels": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"matchLabels": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"namespace": {
|
||||
"type": "string"
|
||||
},
|
||||
"targetLabels": {
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"nodeSelector": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
@@ -452,49 +540,6 @@
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"serviceMonitor": {
|
||||
"properties": {
|
||||
"annotations": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"endpoint": {
|
||||
"properties": {
|
||||
"interval": {
|
||||
"type": "string"
|
||||
},
|
||||
"metricRelabelings": {
|
||||
"type": "array"
|
||||
},
|
||||
"relabelings": {
|
||||
"type": "array"
|
||||
},
|
||||
"scrapeTimeout": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"labels": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"matchLabels": {
|
||||
"properties": {},
|
||||
"type": "object"
|
||||
},
|
||||
"namespace": {
|
||||
"type": "string"
|
||||
},
|
||||
"targetLabels": {
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"tls": {
|
||||
"properties": {
|
||||
"create": {
|
||||
|
||||
@@ -377,27 +377,52 @@ webhooks:
|
||||
- key: capsule.clastix.io/tenant
|
||||
operator: Exists
|
||||
|
||||
# Monitoring Settings
|
||||
monitoring:
|
||||
|
||||
# ServiceMonitor
|
||||
serviceMonitor:
|
||||
# -- Enable ServiceMonitor
|
||||
enabled: false
|
||||
# -- Install the ServiceMonitor into a different Namespace, as the monitoring stack one (default: the release one)
|
||||
namespace: ''
|
||||
# -- Assign additional labels according to Prometheus' serviceMonitorSelector matching labels
|
||||
labels: {}
|
||||
# -- Assign additional Annotations
|
||||
annotations: {}
|
||||
# -- Change matching labels
|
||||
matchLabels: {}
|
||||
# -- Set targetLabels for the serviceMonitor
|
||||
targetLabels: []
|
||||
endpoint:
|
||||
# -- Set the scrape interval for the endpoint of the serviceMonitor
|
||||
interval: "15s"
|
||||
# -- Set the scrape timeout for the endpoint of the serviceMonitor
|
||||
scrapeTimeout: ""
|
||||
# -- Set metricRelabelings for the endpoint of the serviceMonitor
|
||||
metricRelabelings: []
|
||||
# -- Set relabelings for the endpoint of the serviceMonitor
|
||||
relabelings: []
|
||||
dashboards:
|
||||
# -- Enable Dashboards to be deployed
|
||||
enabled: false
|
||||
# -- Annotations for dashboard configmaps
|
||||
annotations: {}
|
||||
# -- Labels for dashboard configmaps
|
||||
labels: {}
|
||||
# grafana_dashboard: "1"
|
||||
# -- Custom namespace for dashboard configmaps
|
||||
namespace: ""
|
||||
# Grafana Operator
|
||||
operator:
|
||||
# -- Enable Operator Resources (GrafanaDashboard)
|
||||
enabled: true
|
||||
# -- Allow the Operator to match this resource with Grafanas outside the current namespace
|
||||
allowCrossNamespaceImport: true
|
||||
# -- How often the resource is synced, defaults to 10m0s if not set
|
||||
resyncPeriod: "10m"
|
||||
# -- Selects Grafana instances for import
|
||||
instanceSelector: {}
|
||||
# -- folder assignment for dashboard
|
||||
folder: ""
|
||||
|
||||
# ServiceMonitor
|
||||
serviceMonitor:
|
||||
# -- Enable ServiceMonitor
|
||||
enabled: false
|
||||
# -- Install the ServiceMonitor into a different Namespace, as the monitoring stack one (default: the release one)
|
||||
namespace: ''
|
||||
# -- Assign additional labels according to Prometheus' serviceMonitorSelector matching labels
|
||||
labels: {}
|
||||
# -- Assign additional Annotations
|
||||
annotations: {}
|
||||
# -- Change matching labels
|
||||
matchLabels: {}
|
||||
# -- Set targetLabels for the serviceMonitor
|
||||
targetLabels: []
|
||||
endpoint:
|
||||
# -- Set the scrape interval for the endpoint of the serviceMonitor
|
||||
interval: "15s"
|
||||
# -- Set the scrape timeout for the endpoint of the serviceMonitor
|
||||
scrapeTimeout: ""
|
||||
# -- Set metricRelabelings for the endpoint of the serviceMonitor
|
||||
metricRelabelings: []
|
||||
# -- Set relabelings for the endpoint of the serviceMonitor
|
||||
relabelings: []
|
||||
|
||||
@@ -51,9 +51,9 @@ func (r resourceClaimController) Reconcile(ctx context.Context, request ctrl.Req
|
||||
instance := &capsulev1beta2.ResourcePoolClaim{}
|
||||
if err = r.Get(ctx, request.NamespacedName, instance); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
log.Info("Request object not found, could have been deleted after reconcile request")
|
||||
log.V(5).Info("Request object not found, could have been deleted after reconcile request")
|
||||
|
||||
r.metrics.DeleteClaimMetric(request.Name)
|
||||
r.metrics.DeleteClaimMetric(request.Name, request.Namespace)
|
||||
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"golang.org/x/sync/errgroup"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@@ -76,7 +75,7 @@ func (r resourcePoolController) Reconcile(ctx context.Context, request ctrl.Requ
|
||||
instance := &capsulev1beta2.ResourcePool{}
|
||||
if err = r.Get(ctx, request.NamespacedName, instance); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
log.Info("Request object not found, could have been deleted after reconcile request")
|
||||
log.V(5).Info("Request object not found, could have been deleted after reconcile request")
|
||||
|
||||
r.metrics.DeleteResourcePoolMetric(request.Name)
|
||||
|
||||
@@ -198,18 +197,23 @@ func (r *resourcePoolController) reconcile(
|
||||
|
||||
// Keeps track of resources which are exhausted by previous resource
|
||||
// This is only required when Ordered is active
|
||||
queuedResourcesMap := make(map[string]resource.Quantity)
|
||||
exhaustions := make(map[string]api.PoolExhaustionResource)
|
||||
|
||||
// You can now iterate over `allClaims` in order
|
||||
for _, claim := range claims {
|
||||
log.Info("Found claim", "name", claim.Name, "namespace", claim.Namespace, "created", claim.CreationTimestamp)
|
||||
log.V(5).Info("Found claim", "name", claim.Name, "namespace", claim.Namespace, "created", claim.CreationTimestamp)
|
||||
|
||||
err = r.reconcileResourceClaim(ctx, log.WithValues("Claim", claim.Name), pool, &claim, queuedResourcesMap)
|
||||
err = r.reconcileResourceClaim(ctx, log.WithValues("Claim", claim.Name), pool, &claim, exhaustions)
|
||||
if err != nil {
|
||||
log.Error(err, "Failed to reconcile ResourceQuotaClaim", "claim", claim.Name)
|
||||
}
|
||||
}
|
||||
|
||||
log.V(7).Info("finalized reconciling claims", "exhaustions", exhaustions)
|
||||
|
||||
r.metrics.CalculateExhaustions(pool, exhaustions)
|
||||
pool.Status.Exhaustions = exhaustions
|
||||
|
||||
pool.CalculateClaimedResources()
|
||||
pool.AssignClaims()
|
||||
|
||||
@@ -222,7 +226,7 @@ func (r *resourcePoolController) reconcileResourceClaim(
|
||||
log logr.Logger,
|
||||
pool *capsulev1beta2.ResourcePool,
|
||||
claim *capsulev1beta2.ResourcePoolClaim,
|
||||
exhaustion map[string]resource.Quantity,
|
||||
exhaustion map[string]api.PoolExhaustionResource,
|
||||
) (err error) {
|
||||
t := pool.GetClaimFromStatus(claim)
|
||||
if t != nil {
|
||||
@@ -257,7 +261,6 @@ func (r *resourcePoolController) reconcileResourceClaim(
|
||||
|
||||
return r.handleClaimResourceExhaustion(
|
||||
ctx,
|
||||
pool,
|
||||
claim,
|
||||
exhaustions,
|
||||
exhaustion,
|
||||
@@ -271,14 +274,14 @@ func (r *resourcePoolController) canClaimWithinNamespace(
|
||||
log logr.Logger,
|
||||
pool *capsulev1beta2.ResourcePool,
|
||||
claim *capsulev1beta2.ResourcePoolClaim,
|
||||
) (res map[string]PoolExhaustionResource) {
|
||||
) (res map[string]api.PoolExhaustionResource) {
|
||||
claimable := pool.GetAvailableClaimableResources()
|
||||
log.V(5).Info("claimable resources", "claimable", claimable)
|
||||
|
||||
_, namespaceClaimed := pool.GetNamespaceClaims(claim.Namespace)
|
||||
log.V(5).Info("namespace claimed resources", "claimed", namespaceClaimed)
|
||||
|
||||
res = make(map[string]PoolExhaustionResource)
|
||||
res = make(map[string]api.PoolExhaustionResource)
|
||||
|
||||
for resourceName, req := range claim.Spec.ResourceClaims {
|
||||
// Verify if total Quota is available
|
||||
@@ -286,10 +289,9 @@ func (r *resourcePoolController) canClaimWithinNamespace(
|
||||
if !exists || available.IsZero() || available.Cmp(req) < 0 {
|
||||
log.V(5).Info("not enough resources available", "available", available, "requesting", req)
|
||||
|
||||
res[resourceName.String()] = PoolExhaustionResource{
|
||||
res[resourceName.String()] = api.PoolExhaustionResource{
|
||||
Available: available,
|
||||
Requesting: req,
|
||||
Namespace: false,
|
||||
}
|
||||
|
||||
continue
|
||||
@@ -303,12 +305,12 @@ func (r *resourcePoolController) canClaimWithinNamespace(
|
||||
func (r *resourcePoolController) handleClaimOrderedExhaustion(
|
||||
ctx context.Context,
|
||||
claim *capsulev1beta2.ResourcePoolClaim,
|
||||
exhaustion map[string]resource.Quantity,
|
||||
exhaustions map[string]api.PoolExhaustionResource,
|
||||
) (queued bool, err error) {
|
||||
status := make([]string, 0)
|
||||
|
||||
for resourceName, qt := range claim.Spec.ResourceClaims {
|
||||
req, ok := exhaustion[resourceName.String()]
|
||||
req, ok := exhaustions[resourceName.String()]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
@@ -318,7 +320,7 @@ func (r *resourcePoolController) handleClaimOrderedExhaustion(
|
||||
resourceName,
|
||||
qt.String(),
|
||||
resourceName,
|
||||
req.String(),
|
||||
req.Requesting.String(),
|
||||
)
|
||||
status = append(status, line)
|
||||
}
|
||||
@@ -339,32 +341,28 @@ func (r *resourcePoolController) handleClaimOrderedExhaustion(
|
||||
|
||||
func (r *resourcePoolController) handleClaimResourceExhaustion(
|
||||
ctx context.Context,
|
||||
pool *capsulev1beta2.ResourcePool,
|
||||
claim *capsulev1beta2.ResourcePoolClaim,
|
||||
exhaustions map[string]PoolExhaustionResource,
|
||||
exhaustion map[string]resource.Quantity,
|
||||
currentExhaustions map[string]api.PoolExhaustionResource,
|
||||
exhaustions map[string]api.PoolExhaustionResource,
|
||||
) (err error) {
|
||||
status := make([]string, 0)
|
||||
|
||||
resourceNames := make([]string, 0)
|
||||
for resourceName := range exhaustions {
|
||||
for resourceName := range currentExhaustions {
|
||||
resourceNames = append(resourceNames, resourceName)
|
||||
}
|
||||
|
||||
sort.Strings(resourceNames)
|
||||
|
||||
for _, resourceName := range resourceNames {
|
||||
ex := exhaustions[resourceName]
|
||||
ex := currentExhaustions[resourceName]
|
||||
|
||||
if *pool.Spec.Config.OrderedQueue {
|
||||
ext, ok := exhaustion[resourceName]
|
||||
if ok {
|
||||
ext.Add(ex.Requesting)
|
||||
} else {
|
||||
ext = ex.Requesting
|
||||
}
|
||||
|
||||
exhaustion[resourceName] = ext
|
||||
ext, ok := exhaustions[resourceName]
|
||||
if ok {
|
||||
ext.Requesting.Add(ex.Requesting)
|
||||
exhaustions[resourceName] = ext
|
||||
} else {
|
||||
exhaustions[resourceName] = ex
|
||||
}
|
||||
|
||||
line := fmt.Sprintf(
|
||||
@@ -465,7 +463,7 @@ func (r *resourcePoolController) handleClaimDisassociation(
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Info("Removing owner reference failed", "claim", current.Name, "pool", pool.Name, "error", err)
|
||||
log.V(3).Info("Removing owner reference failed", "claim", current.Name, "pool", pool.Name, "error", err)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
// Copyright 2020-2023 Project Capsule Authors.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package resourcepools
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
)
|
||||
|
||||
type PoolExhaustion map[string]PoolExhaustionResource
|
||||
|
||||
type PoolExhaustionResource struct {
|
||||
Namespace bool
|
||||
Available resource.Quantity
|
||||
Requesting resource.Quantity
|
||||
}
|
||||
16
pkg/api/exhaustion.go
Normal file
16
pkg/api/exhaustion.go
Normal file
@@ -0,0 +1,16 @@
|
||||
// Copyright 2020-2023 Project Capsule Authors.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
)
|
||||
|
||||
// +kubebuilder:object:generate=true
|
||||
type PoolExhaustionResource struct {
|
||||
// Available Resources to be claimed
|
||||
Available resource.Quantity `json:"available,omitempty"`
|
||||
// Requesting Resources
|
||||
Requesting resource.Quantity `json:"requesting,omitempty"`
|
||||
}
|
||||
@@ -287,6 +287,23 @@ func (in *PodOptions) DeepCopy() *PodOptions {
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PoolExhaustionResource) DeepCopyInto(out *PoolExhaustionResource) {
|
||||
*out = *in
|
||||
out.Available = in.Available.DeepCopy()
|
||||
out.Requesting = in.Requesting.DeepCopy()
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolExhaustionResource.
|
||||
func (in *PoolExhaustionResource) DeepCopy() *PoolExhaustionResource {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PoolExhaustionResource)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ResourceQuotaSpec) DeepCopyInto(out *ResourceQuotaSpec) {
|
||||
*out = *in
|
||||
|
||||
@@ -5,10 +5,10 @@ package metrics
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
crtlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||
|
||||
capsulev1beta2 "github.com/projectcapsule/capsule/api/v1beta2"
|
||||
"github.com/projectcapsule/capsule/pkg/meta"
|
||||
)
|
||||
|
||||
type ClaimRecorder struct {
|
||||
@@ -31,7 +31,7 @@ func NewClaimRecorder() *ClaimRecorder {
|
||||
Name: "claim_condition",
|
||||
Help: "The current condition status of a claim.",
|
||||
},
|
||||
[]string{"name", "target_namespace", "condition", "status", "reason", "pool"},
|
||||
[]string{"name", "target_namespace", "condition", "reason", "pool"},
|
||||
),
|
||||
claimResourcesGauge: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -47,26 +47,29 @@ func NewClaimRecorder() *ClaimRecorder {
|
||||
func (r *ClaimRecorder) Collectors() []prometheus.Collector {
|
||||
return []prometheus.Collector{
|
||||
r.claimConditionGauge,
|
||||
r.claimResourcesGauge,
|
||||
}
|
||||
}
|
||||
|
||||
// RecordCondition records the condition as given for the ref.
|
||||
func (r *ClaimRecorder) RecordClaimCondition(claim *capsulev1beta2.ResourcePoolClaim) {
|
||||
for _, status := range []string{meta.AssignedCondition, meta.BoundCondition} {
|
||||
var value float64
|
||||
if status == claim.Status.Condition.Type {
|
||||
value = 1
|
||||
}
|
||||
// Remove all Condition Metrics to avoid duplicates
|
||||
r.claimConditionGauge.DeletePartialMatch(map[string]string{
|
||||
"name": claim.Name,
|
||||
"namespace": claim.Namespace,
|
||||
})
|
||||
|
||||
r.claimConditionGauge.WithLabelValues(
|
||||
claim.Name,
|
||||
claim.Namespace,
|
||||
status,
|
||||
string(claim.Status.Condition.Status),
|
||||
claim.Status.Condition.Reason,
|
||||
claim.Status.Pool.Name.String(),
|
||||
).Set(value)
|
||||
value := 0
|
||||
if claim.Status.Condition.Status == metav1.ConditionTrue {
|
||||
value = 1
|
||||
}
|
||||
r.claimConditionGauge.WithLabelValues(
|
||||
claim.Name,
|
||||
claim.Namespace,
|
||||
claim.Status.Condition.Type,
|
||||
claim.Status.Condition.Reason,
|
||||
claim.Status.Pool.Name.String(),
|
||||
).Set(float64(value))
|
||||
|
||||
for resourceName, qt := range claim.Spec.ResourceClaims {
|
||||
r.claimResourcesGauge.WithLabelValues(
|
||||
@@ -78,8 +81,13 @@ func (r *ClaimRecorder) RecordClaimCondition(claim *capsulev1beta2.ResourcePoolC
|
||||
}
|
||||
|
||||
// DeleteCondition deletes the condition metrics for the ref.
|
||||
func (r *ClaimRecorder) DeleteClaimMetric(claim string) {
|
||||
for _, status := range []string{meta.ReadyCondition, meta.NotReadyCondition} {
|
||||
r.claimConditionGauge.DeleteLabelValues(claim, status)
|
||||
}
|
||||
func (r *ClaimRecorder) DeleteClaimMetric(claim string, namespace string) {
|
||||
r.claimConditionGauge.DeletePartialMatch(map[string]string{
|
||||
"name": claim,
|
||||
"namespace": namespace,
|
||||
})
|
||||
r.claimResourcesGauge.DeletePartialMatch(map[string]string{
|
||||
"name": claim,
|
||||
"namespace": namespace,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -8,15 +8,19 @@ import (
|
||||
crtlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
|
||||
|
||||
capsulev1beta2 "github.com/projectcapsule/capsule/api/v1beta2"
|
||||
"github.com/projectcapsule/capsule/pkg/api"
|
||||
)
|
||||
|
||||
type ResourcePoolRecorder struct {
|
||||
poolResource *prometheus.GaugeVec
|
||||
poolResourceLimit *prometheus.GaugeVec
|
||||
poolResourceAvailable *prometheus.GaugeVec
|
||||
poolResourceUsage *prometheus.GaugeVec
|
||||
poolResourceExhaustion *prometheus.GaugeVec
|
||||
poolNamespaceResourceUsage *prometheus.GaugeVec
|
||||
poolResource *prometheus.GaugeVec
|
||||
poolResourceLimit *prometheus.GaugeVec
|
||||
poolResourceAvailable *prometheus.GaugeVec
|
||||
poolResourceUsage *prometheus.GaugeVec
|
||||
poolResourceUsagePercentage *prometheus.GaugeVec
|
||||
poolResourceExhaustion *prometheus.GaugeVec
|
||||
poolResourceExhaustionPercentage *prometheus.GaugeVec
|
||||
poolNamespaceResourceUsage *prometheus.GaugeVec
|
||||
poolNamespaceResourceUsagePercentage *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
func MustMakeResourcePoolRecorder() *ResourcePoolRecorder {
|
||||
@@ -36,6 +40,14 @@ func NewResourcePoolRecorder() *ResourcePoolRecorder {
|
||||
},
|
||||
[]string{"pool", "resource"},
|
||||
),
|
||||
poolResourceExhaustionPercentage: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricsPrefix,
|
||||
Name: "pool_exhaustion_percentage",
|
||||
Help: "Resources become exhausted, when there's not enough available for all claims and the claims get queued (Percentage)",
|
||||
},
|
||||
[]string{"pool", "resource"},
|
||||
),
|
||||
poolResource: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricsPrefix,
|
||||
@@ -60,7 +72,14 @@ func NewResourcePoolRecorder() *ResourcePoolRecorder {
|
||||
},
|
||||
[]string{"pool", "resource"},
|
||||
),
|
||||
|
||||
poolResourceUsagePercentage: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricsPrefix,
|
||||
Name: "pool_usage_percentage",
|
||||
Help: "Current resource usage for a given resource in a resource pool (percentage)",
|
||||
},
|
||||
[]string{"pool", "resource"},
|
||||
),
|
||||
poolResourceAvailable: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricsPrefix,
|
||||
@@ -77,6 +96,14 @@ func NewResourcePoolRecorder() *ResourcePoolRecorder {
|
||||
},
|
||||
[]string{"pool", "target_namespace", "resource"},
|
||||
),
|
||||
poolNamespaceResourceUsagePercentage: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricsPrefix,
|
||||
Name: "pool_namespace_usage_percentage",
|
||||
Help: "Current resources claimed on namespace basis for a given resource in a resource pool for a specific namespace (percentage)",
|
||||
},
|
||||
[]string{"pool", "target_namespace", "resource"},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,9 +112,12 @@ func (r *ResourcePoolRecorder) Collectors() []prometheus.Collector {
|
||||
r.poolResource,
|
||||
r.poolResourceLimit,
|
||||
r.poolResourceUsage,
|
||||
r.poolResourceUsagePercentage,
|
||||
r.poolResourceAvailable,
|
||||
r.poolResourceExhaustion,
|
||||
r.poolResourceExhaustionPercentage,
|
||||
r.poolNamespaceResourceUsage,
|
||||
r.poolNamespaceResourceUsagePercentage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,11 +154,57 @@ func (r *ResourcePoolRecorder) ResourceUsageMetrics(pool *capsulev1beta2.Resourc
|
||||
pool.Name,
|
||||
resourceName.String(),
|
||||
).Set(float64(available.MilliValue()) / 1000)
|
||||
|
||||
usagePercentage := float64(0)
|
||||
if quantity.MilliValue() > 0 {
|
||||
usagePercentage = (float64(claimed.MilliValue()) / float64(quantity.MilliValue())) * 100
|
||||
}
|
||||
|
||||
r.poolResourceUsagePercentage.WithLabelValues(
|
||||
pool.Name,
|
||||
resourceName.String(),
|
||||
).Set(usagePercentage)
|
||||
}
|
||||
|
||||
r.resourceUsageMetricsByNamespace(pool)
|
||||
}
|
||||
|
||||
// Emit exhaustion metrics
|
||||
func (r *ResourcePoolRecorder) CalculateExhaustions(
|
||||
pool *capsulev1beta2.ResourcePool,
|
||||
current map[string]api.PoolExhaustionResource,
|
||||
) {
|
||||
for resource := range pool.Status.Exhaustions {
|
||||
if _, ok := current[resource]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
r.poolResourceExhaustion.DeleteLabelValues(pool.Name, resource)
|
||||
r.poolResourceExhaustionPercentage.DeleteLabelValues(pool.Name, resource)
|
||||
}
|
||||
|
||||
for resource, ex := range current {
|
||||
available := float64(ex.Available.MilliValue()) / 1000
|
||||
requesting := float64(ex.Requesting.MilliValue()) / 1000
|
||||
|
||||
r.poolResourceExhaustion.WithLabelValues(
|
||||
pool.Name,
|
||||
resource,
|
||||
).Set(float64(ex.Requesting.MilliValue()) / 1000)
|
||||
|
||||
// Calculate and expose overprovisioning percentage
|
||||
if available > 0 && requesting > available {
|
||||
percent := ((requesting - available) / available) * 100
|
||||
r.poolResourceExhaustionPercentage.WithLabelValues(
|
||||
pool.Name,
|
||||
resource,
|
||||
).Set(percent)
|
||||
} else {
|
||||
r.poolResourceExhaustionPercentage.DeleteLabelValues(pool.Name, resource)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete all metrics for a namespace in a resource pool.
|
||||
func (r *ResourcePoolRecorder) DeleteResourcePoolNamespaceMetric(pool string, namespace string) {
|
||||
r.poolNamespaceResourceUsage.DeletePartialMatch(map[string]string{"pool": pool, "namespace": namespace})
|
||||
@@ -147,7 +223,9 @@ func (r *ResourcePoolRecorder) cleanupAllMetricForLabels(labels map[string]strin
|
||||
r.poolResourceLimit.DeletePartialMatch(labels)
|
||||
r.poolResourceAvailable.DeletePartialMatch(labels)
|
||||
r.poolResourceUsage.DeletePartialMatch(labels)
|
||||
r.poolResourceUsagePercentage.DeletePartialMatch(labels)
|
||||
r.poolNamespaceResourceUsage.DeletePartialMatch(labels)
|
||||
r.poolNamespaceResourceUsagePercentage.DeletePartialMatch(labels)
|
||||
r.poolResource.DeletePartialMatch(labels)
|
||||
r.poolResourceExhaustion.DeletePartialMatch(labels)
|
||||
}
|
||||
@@ -163,6 +241,17 @@ func (r *ResourcePoolRecorder) resourceUsageMetricsByNamespace(pool *capsulev1be
|
||||
namespace,
|
||||
resourceName.String(),
|
||||
).Set(float64(quantity.MilliValue()) / 1000)
|
||||
|
||||
availble, ok := pool.Status.Allocation.Hard[resourceName]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
r.poolNamespaceResourceUsagePercentage.WithLabelValues(
|
||||
pool.Name,
|
||||
namespace,
|
||||
resourceName.String(),
|
||||
).Set((float64(quantity.MilliValue()) / float64(availble.MilliValue())) * 100)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user