mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-02-14 10:19:54 +00:00
feat: cgroups host collector (#1581)
Linux control groups host collector that detects whether the specified mountPoint is a cgroup filesystem and what version it is. The collector also collects information of the configured cgroup controllers. Signed-off-by: Evans Mungai <evans@replicated.com>
This commit is contained in:
@@ -1240,6 +1240,15 @@ spec:
|
||||
required:
|
||||
- paths
|
||||
type: object
|
||||
cgroups:
|
||||
properties:
|
||||
collectorName:
|
||||
type: string
|
||||
exclude:
|
||||
type: BoolString
|
||||
mountPoint:
|
||||
type: string
|
||||
type: object
|
||||
copy:
|
||||
properties:
|
||||
collectorName:
|
||||
|
||||
@@ -1240,6 +1240,15 @@ spec:
|
||||
required:
|
||||
- paths
|
||||
type: object
|
||||
cgroups:
|
||||
properties:
|
||||
collectorName:
|
||||
type: string
|
||||
exclude:
|
||||
type: BoolString
|
||||
mountPoint:
|
||||
type: string
|
||||
type: object
|
||||
copy:
|
||||
properties:
|
||||
collectorName:
|
||||
|
||||
@@ -19839,6 +19839,15 @@ spec:
|
||||
required:
|
||||
- paths
|
||||
type: object
|
||||
cgroups:
|
||||
properties:
|
||||
collectorName:
|
||||
type: string
|
||||
exclude:
|
||||
type: BoolString
|
||||
mountPoint:
|
||||
type: string
|
||||
type: object
|
||||
copy:
|
||||
properties:
|
||||
collectorName:
|
||||
|
||||
8
go.mod
8
go.mod
@@ -8,6 +8,8 @@ require (
|
||||
github.com/ahmetalpbalkan/go-cursor v0.0.0-20131010032410-8136607ea412
|
||||
github.com/apparentlymart/go-cidr v1.1.0
|
||||
github.com/blang/semver/v4 v4.0.0
|
||||
github.com/cilium/ebpf v0.11.0
|
||||
github.com/containerd/cgroups/v3 v3.0.3
|
||||
github.com/containers/image/v5 v5.31.1
|
||||
github.com/distribution/distribution/v3 v3.0.0-beta.1
|
||||
github.com/fatih/color v1.17.0
|
||||
@@ -68,9 +70,9 @@ require (
|
||||
github.com/Masterminds/squirrel v1.5.4 // indirect
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
|
||||
github.com/chai2010/gettext-go v1.0.2 // indirect
|
||||
github.com/containerd/cgroups/v3 v3.0.3 // indirect
|
||||
github.com/containerd/errdefs v0.1.0 // indirect
|
||||
github.com/containerd/log v0.1.0 // indirect
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
github.com/docker/distribution v2.8.3+incompatible // indirect
|
||||
@@ -207,7 +209,7 @@ require (
|
||||
github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d // indirect
|
||||
github.com/nwaples/rardecode v1.1.2 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/runtime-spec v1.2.0 // indirect
|
||||
github.com/opencontainers/runtime-spec v1.2.0
|
||||
github.com/opencontainers/selinux v1.11.0 // indirect
|
||||
github.com/ostreedev/ostree-go v0.0.0-20210805093236-719684c64e4f // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||
@@ -235,7 +237,7 @@ require (
|
||||
golang.org/x/crypto v0.25.0 // indirect
|
||||
golang.org/x/net v0.27.0
|
||||
golang.org/x/oauth2 v0.20.0 // indirect
|
||||
golang.org/x/sys v0.22.0 // indirect
|
||||
golang.org/x/sys v0.22.0
|
||||
golang.org/x/term v0.22.0 // indirect
|
||||
golang.org/x/text v0.16.0
|
||||
golang.org/x/time v0.5.0 // indirect
|
||||
|
||||
5
go.sum
5
go.sum
@@ -274,6 +274,8 @@ github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObk
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04=
|
||||
github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8=
|
||||
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
|
||||
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
|
||||
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
|
||||
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
|
||||
github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
|
||||
@@ -415,6 +417,7 @@ github.com/gobuffalo/packr/v2 v2.8.3 h1:xE1yzvnO56cUC0sTpKR3DIbxZgB54AftTFMhB2XE
|
||||
github.com/gobuffalo/packr/v2 v2.8.3/go.mod h1:0SahksCVcx4IMnigTjiFuyldmTrdTctXsOdiU5KwbKc=
|
||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
|
||||
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
@@ -962,6 +965,8 @@ go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lI
|
||||
go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=
|
||||
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY=
|
||||
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
|
||||
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
|
||||
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
|
||||
|
||||
@@ -76,6 +76,11 @@ type HostCopy struct {
|
||||
Path string `json:"path" yaml:"path"`
|
||||
}
|
||||
|
||||
type HostCGroups struct {
|
||||
HostCollectorMeta `json:",inline" yaml:",inline"`
|
||||
MountPoint string `json:"mountPoint,omitempty" yaml:"mountPoint,omitempty"`
|
||||
}
|
||||
|
||||
type HostTime struct {
|
||||
HostCollectorMeta `json:",inline" yaml:",inline"`
|
||||
}
|
||||
@@ -219,8 +224,11 @@ type HostCollect struct {
|
||||
HostRun *HostRun `json:"run,omitempty" yaml:"run,omitempty"`
|
||||
HostCopy *HostCopy `json:"copy,omitempty" yaml:"copy,omitempty"`
|
||||
HostKernelConfigs *HostKernelConfigs `json:"kernelConfigs,omitempty" yaml:"kernelConfigs,omitempty"`
|
||||
HostCGroups *HostCGroups `json:"cgroups,omitempty" yaml:"cgroups,omitempty"`
|
||||
}
|
||||
|
||||
// GetName gets the name of the collector
|
||||
// Deprecated: This function is not used anywhere and should be removed. Do not use it.
|
||||
func (c *HostCollect) GetName() string {
|
||||
// TODO: Is this used anywhere? Should we just remove it?
|
||||
var collector string
|
||||
|
||||
@@ -1906,6 +1906,22 @@ func (in *HostBlockDevices) DeepCopy() *HostBlockDevices {
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *HostCGroups) DeepCopyInto(out *HostCGroups) {
|
||||
*out = *in
|
||||
in.HostCollectorMeta.DeepCopyInto(&out.HostCollectorMeta)
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostCGroups.
|
||||
func (in *HostCGroups) DeepCopy() *HostCGroups {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(HostCGroups)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *HostCertificatesCollection) DeepCopyInto(out *HostCertificatesCollection) {
|
||||
*out = *in
|
||||
@@ -2077,6 +2093,11 @@ func (in *HostCollect) DeepCopyInto(out *HostCollect) {
|
||||
*out = new(HostKernelConfigs)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.HostCGroups != nil {
|
||||
in, out := &in.HostCGroups, &out.HostCGroups
|
||||
*out = new(HostCGroups)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostCollect.
|
||||
|
||||
99
pkg/collect/host_cgroup.go
Normal file
99
pkg/collect/host_cgroup.go
Normal file
@@ -0,0 +1,99 @@
|
||||
// This Control Groups collector is heavily based on k0s'
|
||||
// probes implementation https://github.com/k0sproject/k0s/blob/main/internal/pkg/sysinfo/probes/linux/cgroups.go
|
||||
|
||||
package collect
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
const hostCGroupsPath = `host-collectors/system/cgroups.json`
|
||||
|
||||
type CollectHostCGroups struct {
|
||||
hostCollector *troubleshootv1beta2.HostCGroups
|
||||
BundlePath string
|
||||
}
|
||||
|
||||
type cgroupResult struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
MountPoint string `json:"mountPoint"`
|
||||
Controllers []string `json:"controllers"`
|
||||
}
|
||||
|
||||
type cgroupsResult struct {
|
||||
CGroupEnabled bool `json:"cgroup-enabled"`
|
||||
CGroupV1 cgroupResult `json:"cgroup-v1"`
|
||||
CGroupV2 cgroupResult `json:"cgroup-v2"`
|
||||
// AllControllers is a list of all cgroup controllers found in the system
|
||||
AllControllers []string `json:"allControllers"`
|
||||
}
|
||||
|
||||
func (c *CollectHostCGroups) Title() string {
|
||||
return hostCollectorTitleOrDefault(c.hostCollector.HostCollectorMeta, "cgroups")
|
||||
}
|
||||
|
||||
func (c *CollectHostCGroups) IsExcluded() (bool, error) {
|
||||
return isExcluded(c.hostCollector.Exclude)
|
||||
}
|
||||
|
||||
func (c *CollectHostCGroups) Collect(progressChan chan<- interface{}) (map[string][]byte, error) {
|
||||
// https://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
// Implementation is based on https://github.com/k0sproject/k0s/blob/main/internal/pkg/sysinfo/probes/linux/cgroups.go
|
||||
|
||||
if c.hostCollector.MountPoint == "" {
|
||||
c.hostCollector.MountPoint = "/sys/fs/cgroup"
|
||||
}
|
||||
|
||||
results, err := discoverConfiguration(c.hostCollector.MountPoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Save the results
|
||||
resultsJson, err := json.MarshalIndent(results, "", " ")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
output := NewResult()
|
||||
err = output.SaveResult(c.BundlePath, hostCGroupsPath, bytes.NewBuffer(resultsJson))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
func parseV1ControllerNames(r io.Reader) ([]string, error) {
|
||||
names := []string{}
|
||||
var lineNo uint
|
||||
lines := bufio.NewScanner(r)
|
||||
for lines.Scan() {
|
||||
lineNo = lineNo + 1
|
||||
if err := lines.Err(); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse /proc/cgroups at line %d: %w ", lineNo, err)
|
||||
}
|
||||
text := lines.Text()
|
||||
if len(text) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if text[0] != '#' {
|
||||
parts := strings.Fields(text)
|
||||
if len(parts) >= 4 && parts[3] != "0" {
|
||||
names = append(names, parts[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
klog.V(2).Info("cgroup v1 controllers loaded")
|
||||
|
||||
return names, nil
|
||||
}
|
||||
330
pkg/collect/host_cgroup_linux.go
Normal file
330
pkg/collect/host_cgroup_linux.go
Normal file
@@ -0,0 +1,330 @@
|
||||
// This Control Groups collector is heavily based on k0s'
|
||||
// probes implementation https://github.com/k0sproject/k0s/blob/main/internal/pkg/sysinfo/probes/linux/cgroups.go
|
||||
|
||||
//go:build linux
|
||||
|
||||
package collect
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/cilium/ebpf/rlimit"
|
||||
"github.com/containerd/cgroups/v3/cgroup2"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/ptr"
|
||||
)
|
||||
|
||||
func discoverConfiguration(mountPoint string) (cgroupsResult, error) {
|
||||
results := cgroupsResult{}
|
||||
|
||||
var st syscall.Statfs_t
|
||||
if err := syscall.Statfs(mountPoint, &st); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
klog.V(2).Infof("no file system mounted at %q", mountPoint)
|
||||
return results, nil
|
||||
}
|
||||
|
||||
return results, fmt.Errorf("failed to stat %q: %w", mountPoint, err)
|
||||
}
|
||||
|
||||
switch st.Type {
|
||||
case unix.CGROUP2_SUPER_MAGIC:
|
||||
klog.V(2).Infof("cgroup v2 mounted at %q", mountPoint)
|
||||
// Discover cgroup2 and controllers enabled
|
||||
// https://www.kernel.org/doc/html/v5.16/admin-guide/cgroup-v2.html#mounting
|
||||
v, err := discoverV2Configuration(mountPoint)
|
||||
if err != nil {
|
||||
return results, fmt.Errorf("failed to discover cgroup v2 configuration from %s mount point: %w", mountPoint, err)
|
||||
}
|
||||
results.CGroupV2 = v
|
||||
case unix.CGROUP_SUPER_MAGIC, unix.TMPFS_MAGIC:
|
||||
klog.V(2).Infof("cgroup v1 mounted at %q", mountPoint)
|
||||
// Discover cgroup1 and controllers enabled
|
||||
// https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/cgroups.7?h=man-pages-5.13#n159
|
||||
// https://www.kernel.org/doc/html/v5.16/admin-guide/cgroup-v1/cgroups.html#how-do-i-use-cgroups
|
||||
r, err := discoverV1Configuration(mountPoint)
|
||||
if err != nil {
|
||||
return results, fmt.Errorf("failed to discover cgroup v1 configuration from %s mount point: %w", mountPoint, err)
|
||||
}
|
||||
results.CGroupV1 = r
|
||||
default:
|
||||
return results, fmt.Errorf("unexpected file system type of %q: 0x%x", mountPoint, st.Type)
|
||||
}
|
||||
|
||||
// If cgroup1 or cgroup2 is enabled
|
||||
results.CGroupEnabled = results.CGroupV1.Enabled || results.CGroupV2.Enabled
|
||||
|
||||
// Sort controllers for consistent output
|
||||
if len(results.CGroupV1.Controllers) > 0 {
|
||||
sort.Strings(results.CGroupV1.Controllers)
|
||||
} else {
|
||||
results.CGroupV1.Controllers = []string{}
|
||||
}
|
||||
if len(results.CGroupV2.Controllers) > 0 {
|
||||
sort.Strings(results.CGroupV2.Controllers)
|
||||
} else {
|
||||
results.CGroupV2.Controllers = []string{}
|
||||
}
|
||||
|
||||
// Combine all controllers
|
||||
set := make(map[string]struct{})
|
||||
for _, c := range results.CGroupV1.Controllers {
|
||||
set[c] = struct{}{}
|
||||
}
|
||||
|
||||
for _, c := range results.CGroupV2.Controllers {
|
||||
set[c] = struct{}{}
|
||||
}
|
||||
|
||||
for c := range set {
|
||||
results.AllControllers = append(results.AllControllers, c)
|
||||
}
|
||||
sort.Strings(results.AllControllers)
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func discoverV1Configuration(mountPoint string) (cgroupResult, error) {
|
||||
res := cgroupResult{}
|
||||
// Get the available controllers from /proc/cgroups.
|
||||
// See https://www.man7.org/linux/man-pages/man7/cgroups.7.html#NOTES
|
||||
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to open /proc/cgroups: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
names, err := parseV1ControllerNames(f)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
res.Enabled = true
|
||||
res.Controllers = names
|
||||
res.MountPoint = mountPoint
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func discoverV2Configuration(mountPoint string) (cgroupResult, error) {
|
||||
res := cgroupResult{}
|
||||
|
||||
// Detect all the listed root controllers.
|
||||
controllers, err := detectV2Controllers(mountPoint)
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
|
||||
res.Enabled = true
|
||||
res.Controllers = controllers
|
||||
res.MountPoint = mountPoint
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Detects all the listed root controllers.
|
||||
//
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files
|
||||
func detectV2Controllers(mountPoint string) ([]string, error) {
|
||||
root, err := cgroup2.Load("/", cgroup2.WithMountpoint(mountPoint))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load root cgroup: %w", err)
|
||||
}
|
||||
|
||||
// Load root controllers
|
||||
controllerNames, err := root.RootControllers() // This reads cgroup.controllers
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list cgroup root controllers: %w", err)
|
||||
}
|
||||
|
||||
for _, c := range controllerNames {
|
||||
if c == "cpu" {
|
||||
// If the cpu controller is enabled, the cpuacct controller is also enabled.
|
||||
// This controller succeeded v1's cpuacct and cpu controllers.
|
||||
// https://www.man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
controllerNames = append(controllerNames, "cpuacct")
|
||||
}
|
||||
}
|
||||
|
||||
// Detect freezer controller
|
||||
if detectV2FreezerController(mountPoint) {
|
||||
controllerNames = append(controllerNames, "freezer")
|
||||
}
|
||||
|
||||
// Detect devices controller
|
||||
if detectV2DevicesController(mountPoint) {
|
||||
controllerNames = append(controllerNames, "devices")
|
||||
}
|
||||
|
||||
return controllerNames, nil
|
||||
}
|
||||
|
||||
// Detects the device controller by trying to attach a dummy program of type
|
||||
// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files
|
||||
// and is implemented purely on top of BPF, this is the only reliable way to
|
||||
// detect it. A best-guess detection via the kernel version has the major
|
||||
// drawback of not working with kernels that have a lot of backported features,
|
||||
// such as RHEL and friends.
|
||||
//
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller
|
||||
func detectV2DevicesController(mountPoint string) bool {
|
||||
err := attachDummyDeviceFilter(mountPoint)
|
||||
switch {
|
||||
case err == nil:
|
||||
klog.V(2).Info("eBPF device filter program successfully attached")
|
||||
return true
|
||||
// EACCES occurs when not allowed to create cgroups.
|
||||
// EPERM occurs when not allowed to load eBPF programs.
|
||||
case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0:
|
||||
// Insufficient permissions. Loading the eBPF program requires elevated permissions
|
||||
return true
|
||||
case errors.Is(err, unix.EROFS):
|
||||
// Read-only file system detected when trying to create a temporary cgroup
|
||||
return true
|
||||
case eBPFProgramUnsupported(err):
|
||||
klog.V(2).Info("eBPF device filter program is unsupported by the kernel")
|
||||
return false
|
||||
}
|
||||
|
||||
klog.V(2).Infof("failed to attach eBPF device filter program: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created
|
||||
// cgroup and removes the program and cgroup again.
|
||||
func attachDummyDeviceFilter(mountPoint string) (err error) {
|
||||
insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{
|
||||
Allow: true,
|
||||
Type: "a",
|
||||
Major: ptr.To(int64(-1)),
|
||||
Minor: ptr.To(int64(-1)),
|
||||
Access: "rwm",
|
||||
}})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create eBPF device filter program: %w", err)
|
||||
}
|
||||
|
||||
tmpCgroupPath, err := os.MkdirTemp(mountPoint, "troubleshoot-devices-detection-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create temporary cgroup: %w", err)
|
||||
}
|
||||
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()
|
||||
|
||||
dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err})
|
||||
}
|
||||
defer func() {
|
||||
if closeErr := unix.Close(dirFD); closeErr != nil {
|
||||
err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr})
|
||||
}
|
||||
}()
|
||||
|
||||
close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
|
||||
if err != nil {
|
||||
// RemoveMemlock may be required on kernels < 5.11
|
||||
// observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l
|
||||
// https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360
|
||||
if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") {
|
||||
if err2 := rlimit.RemoveMemlock(); err2 != nil {
|
||||
err = errors.Join(err, err2)
|
||||
} else {
|
||||
// Try again, MEMLOCK should be removed by now.
|
||||
close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
|
||||
if err2 != nil {
|
||||
err = errors.Join(err, err2)
|
||||
} else {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
if eBPFProgramUnsupported(err) {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err)
|
||||
}
|
||||
|
||||
return close()
|
||||
}
|
||||
|
||||
// Returns true if the given error indicates that an eBPF program is unsupported
|
||||
// by the kernel.
|
||||
func eBPFProgramUnsupported(err error) bool {
|
||||
// https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49
|
||||
|
||||
switch {
|
||||
// EINVAL occurs when attempting to create a program with an unknown type.
|
||||
case errors.Is(err, unix.EINVAL):
|
||||
return true
|
||||
|
||||
// E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of
|
||||
// the struct known by the running kernel, meaning the kernel is too old to
|
||||
// support the given prog type.
|
||||
case errors.Is(err, unix.E2BIG):
|
||||
return true
|
||||
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Detect the freezer controller. It doesn't appear in the cgroup.controllers
|
||||
// file. Check for the existence of the cgroup.freeze file in the troubleshoot cgroup
|
||||
// instead, or try to create a dummy cgroup if troubleshoot runs in the root cgroup.
|
||||
//
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files
|
||||
func detectV2FreezerController(mountPoint string) bool {
|
||||
|
||||
// Detect the freezer controller by checking troubleshoot's cgroup for the existence
|
||||
// of the cgroup.freeze file.
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#processes
|
||||
cgroupPath, err := cgroup2.NestedGroupPath("")
|
||||
if err != nil {
|
||||
klog.V(2).Infof(fmt.Errorf("failed to get troubleshoot cgroup: %w", err).Error())
|
||||
return false
|
||||
}
|
||||
|
||||
if cgroupPath != "/" {
|
||||
cgroupPath = filepath.Join(mountPoint, cgroupPath)
|
||||
} else { // The root cgroup cannot be frozen. Try to create a dummy cgroup.
|
||||
tmpCgroupPath, err := os.MkdirTemp(mountPoint, "troubleshoot-freezer-detection-*")
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrPermission) && os.Geteuid() != 0 {
|
||||
// Insufficient permissions. Creating a cgroup requires elevated permissions
|
||||
klog.V(2).Info("insufficient permissions to create temporary cgroup")
|
||||
}
|
||||
if errors.Is(err, unix.EROFS) && os.Geteuid() != 0 {
|
||||
klog.V(2).Info("read-only file system detected when trying to create a temporary cgroup")
|
||||
}
|
||||
|
||||
klog.V(2).Infof("failed to create temporary cgroup: %v", err)
|
||||
return false
|
||||
}
|
||||
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()
|
||||
cgroupPath = tmpCgroupPath
|
||||
}
|
||||
|
||||
// Check if the cgroup.freeze exists
|
||||
if stat, err := os.Stat(filepath.Join(cgroupPath, "cgroup.freeze")); (err == nil && stat.IsDir()) || os.IsNotExist(err) {
|
||||
klog.V(2).Infof("cgroup.freeze exists at %q", cgroupPath)
|
||||
return false
|
||||
} else if err != nil {
|
||||
klog.V(2).Infof("failed to check for cgroup.freeze at %q: %v", cgroupPath, err)
|
||||
return false
|
||||
}
|
||||
|
||||
klog.V(2).Infof("cgroup.freeze exists at %q", cgroupPath)
|
||||
return true
|
||||
}
|
||||
11
pkg/collect/host_cgroup_others.go
Normal file
11
pkg/collect/host_cgroup_others.go
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build !linux
|
||||
|
||||
package collect
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func discoverConfiguration(_ string) (cgroupsResult, error) {
|
||||
return cgroupsResult{}, fmt.Errorf("Discovery of cgroups not inimplemented for this OS")
|
||||
}
|
||||
60
pkg/collect/host_cgroup_test.go
Normal file
60
pkg/collect/host_cgroup_test.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package collect
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_parseV1ControllerNames(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
subsystems []byte
|
||||
want []string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "no controllers",
|
||||
subsystems: []byte(""),
|
||||
want: []string{},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "multiple enabled controllers",
|
||||
subsystems: []byte(
|
||||
`
|
||||
#subsys_name hierarchy num_cgroups enabled
|
||||
cpuset 5 1
|
||||
cpu 9 41 1
|
||||
cpuacct 9 41 1
|
||||
blkio 11 41 1
|
||||
memory 8 95 0
|
||||
devices 13 41 1
|
||||
freezer 3 2 1
|
||||
net_cls 4 1 1
|
||||
perf_event 2 1 0
|
||||
net_prio 4 1 0
|
||||
hugetlb 12 1 1
|
||||
pids 10 46 1
|
||||
rdma 6 1 0
|
||||
misc 7 1 0
|
||||
`),
|
||||
want: []string{"cpu", "cpuacct", "blkio", "devices", "freezer", "net_cls", "hugetlb", "pids"},
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r := bytes.NewReader(tt.subsystems)
|
||||
|
||||
got, err := parseV1ControllerNames(r)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("parseV1ControllerNames() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("parseV1ControllerNames() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -63,6 +63,8 @@ func GetHostCollector(collector *troubleshootv1beta2.HostCollect, bundlePath str
|
||||
return &CollectHostCopy{collector.HostCopy, bundlePath}, true
|
||||
case collector.HostKernelConfigs != nil:
|
||||
return &CollectHostKernelConfigs{collector.HostKernelConfigs, bundlePath}, true
|
||||
case collector.HostCGroups != nil:
|
||||
return &CollectHostCGroups{collector.HostCGroups, bundlePath}, true
|
||||
default:
|
||||
return nil, false
|
||||
}
|
||||
|
||||
@@ -18816,6 +18816,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"cgroups": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"collectorName": {
|
||||
"type": "string"
|
||||
},
|
||||
"exclude": {
|
||||
"oneOf": [{"type": "string"},{"type": "boolean"}]
|
||||
},
|
||||
"mountPoint": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"copy": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
Reference in New Issue
Block a user