mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-04-15 07:16:34 +00:00
* Add collect command and remote host collectors
Adds the ability to run a host collector on a set of remote k8s nodes.
Target nodes can be filtered using the --selector flag, with the same
syntax as kubectl. Existing flags for --collector-image,
--collector-pullpolicy and --request-timeout are used. To run on a
specified node, --selector="kubernetes.io/hostname=kind-worker2" could
be used.
The collect command is used by the remote collector to output the
results using a "raw" format, which uses the filename as the key, and
the value the output as a escaped json string. When run manually it
defaults to fully decoded json. The existing block devices,
ipv4interfaces and services host collectors don't decode properly - the
fix is to convert their slice output to a map (fix not included as
unsure what depends on the existing format).
The collect command is also useful for troubleshooting preflight issues.
Examples are included to show remote collector usage.
```
bin/collect --collector-image=croomes/troubleshoot:latest examples/collect/remote/memory.yaml --namespace test
{
"kind-control-plane": {
"system/memory.json": {
"total": 1304207360
}
},
"kind-worker": {
"system/memory.json": {
"total": 1695780864
}
},
"kind-worker2": {
"system/memory.json": {
"total": 1726353408
}
}
}
```
The preflight command has been updated to run remote collectors. To run
a host collector remotely it must be specified in the spec as a
`remoteCollector`:
```
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: memory
spec:
remoteCollectors:
- memory:
collectorName: memory
analyzers:
- memory:
outcomes:
- fail:
when: "< 8Gi"
message: At least 8Gi of memory is required
- warn:
when: "< 32Gi"
message: At least 32Gi of memory is recommended
- pass:
message: The system has as sufficient memory
```
Results for each node are analyzed separately, with the node name
appended to the title:
```
bin/preflight --interactive=false --collector-image=croomes/troubleshoot:latest examples/preflight/remote/memory.yaml --format=json
{memory running 0 1}
{memory completed 1 1}
{
"fail": [
{
"title": "Amount of Memory (kind-worker2)",
"message": "At least 8Gi of memory is required"
},
{
"title": "Amount of Memory (kind-worker)",
"message": "At least 8Gi of memory is required"
},
{
"title": "Amount of Memory (kind-control-plane)",
"message": "At least 8Gi of memory is required"
}
]
}
```
Also added a host collector to allow preflight checks of required kernel
modules, which is the main driver for this change.
107 lines
3.0 KiB
Go
107 lines
3.0 KiB
Go
package preflight
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
analyze "github.com/replicatedhq/troubleshoot/pkg/analyze"
|
|
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
|
)
|
|
|
|
// Analyze runs the analyze phase of preflight checks
|
|
func (c ClusterCollectResult) Analyze() []*analyze.AnalyzeResult {
|
|
return doAnalyze(c.AllCollectedData, c.Spec.Spec.Analyzers, nil, "")
|
|
}
|
|
|
|
// Analyze runs the analysze phase of host preflight checks
|
|
func (c HostCollectResult) Analyze() []*analyze.AnalyzeResult {
|
|
return doAnalyze(c.AllCollectedData, nil, c.Spec.Spec.Analyzers, "")
|
|
}
|
|
|
|
// Analyze runs the analysze phase of host preflight checks.
|
|
//
|
|
// Runs the analysis for each node and aggregates the results.
|
|
func (c RemoteCollectResult) Analyze() []*analyze.AnalyzeResult {
|
|
var results []*analyze.AnalyzeResult
|
|
for nodeName, nodeResult := range c.AllCollectedData {
|
|
var strResult = make(map[string]string)
|
|
if err := json.Unmarshal(nodeResult, &strResult); err != nil {
|
|
analyzeResult := &analyze.AnalyzeResult{
|
|
IsFail: true,
|
|
Title: "Remote Result Parser Failed",
|
|
Message: err.Error(),
|
|
}
|
|
results = append(results, analyzeResult)
|
|
continue
|
|
}
|
|
|
|
var byteResult = make(map[string][]byte)
|
|
for k, v := range strResult {
|
|
byteResult[k] = []byte(v)
|
|
|
|
}
|
|
results = append(results, doAnalyze(byteResult, nil, c.Spec.Spec.Analyzers, nodeName)...)
|
|
}
|
|
return results
|
|
}
|
|
|
|
func doAnalyze(allCollectedData map[string][]byte, analyzers []*troubleshootv1beta2.Analyze, hostAnalyzers []*troubleshootv1beta2.HostAnalyze, nodeName string) []*analyze.AnalyzeResult {
|
|
getCollectedFileContents := func(fileName string) ([]byte, error) {
|
|
contents, ok := allCollectedData[fileName]
|
|
if !ok {
|
|
return nil, fmt.Errorf("file %s was not collected", fileName)
|
|
}
|
|
|
|
return contents, nil
|
|
}
|
|
getChildCollectedFileContents := func(prefix string) (map[string][]byte, error) {
|
|
matching := make(map[string][]byte)
|
|
for k, v := range allCollectedData {
|
|
if strings.HasPrefix(k, prefix) {
|
|
matching[k] = v
|
|
}
|
|
}
|
|
|
|
for k, v := range allCollectedData {
|
|
if ok, _ := filepath.Match(prefix, k); ok {
|
|
matching[k] = v
|
|
}
|
|
}
|
|
|
|
return matching, nil
|
|
}
|
|
|
|
analyzeResults := []*analyze.AnalyzeResult{}
|
|
for _, analyzer := range analyzers {
|
|
analyzeResult, err := analyze.Analyze(analyzer, getCollectedFileContents, getChildCollectedFileContents)
|
|
if err != nil {
|
|
analyzeResult = []*analyze.AnalyzeResult{
|
|
{
|
|
IsFail: true,
|
|
Title: "Analyzer Failed",
|
|
Message: err.Error(),
|
|
},
|
|
}
|
|
}
|
|
|
|
if analyzeResult != nil {
|
|
analyzeResults = append(analyzeResults, analyzeResult...)
|
|
}
|
|
}
|
|
|
|
for _, hostAnalyzer := range hostAnalyzers {
|
|
analyzeResult := analyze.HostAnalyze(hostAnalyzer, getCollectedFileContents, getChildCollectedFileContents)
|
|
analyzeResults = append(analyzeResults, analyzeResult...)
|
|
}
|
|
|
|
// Add the nodename to the result title if provided.
|
|
if nodeName != "" {
|
|
for _, result := range analyzeResults {
|
|
result.Title = fmt.Sprintf("%s (%s)", result.Title, nodeName)
|
|
}
|
|
}
|
|
return analyzeResults
|
|
}
|