Files
troubleshoot/cmd/collect/cli/run.go
Simon Croome 977fc438ea Remote host collectors (#392)
* Add collect command and remote host collectors

Adds the ability to run a host collector on a set of remote k8s nodes.
Target nodes can be filtered using the --selector flag, with the same
syntax as kubectl.  Existing flags for --collector-image,
--collector-pullpolicy and --request-timeout are used.  To run on a
specified node, --selector="kubernetes.io/hostname=kind-worker2" could
be used.

The collect command is used by the remote collector to output the
results using a "raw" format, which uses the filename as the key, and
the value the output as a escaped json string.  When run manually it
defaults to fully decoded json. The existing block devices,
ipv4interfaces and services host collectors don't decode properly - the
fix is to convert their slice output to a map (fix not included as
unsure what depends on the existing format).

The collect command is also useful for troubleshooting preflight issues.

Examples are included to show remote collector usage.

```
bin/collect --collector-image=croomes/troubleshoot:latest  examples/collect/remote/memory.yaml --namespace test
{
  "kind-control-plane": {
    "system/memory.json": {
      "total": 1304207360
    }
  },
  "kind-worker": {
    "system/memory.json": {
      "total": 1695780864
    }
  },
  "kind-worker2": {
    "system/memory.json": {
      "total": 1726353408
    }
  }
}
```

The preflight command has been updated to run remote collectors.  To run
a host collector remotely it must be specified in the spec as a
`remoteCollector`:

```
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
  name: memory
spec:
  remoteCollectors:
    - memory:
        collectorName: memory
  analyzers:
    - memory:
        outcomes:
          - fail:
              when: "< 8Gi"
              message: At least 8Gi of memory is required
          - warn:
              when: "< 32Gi"
              message: At least 32Gi of memory is recommended
          - pass:
              message: The system has as sufficient memory
```

Results for each node are analyzed separately, with the node name
appended to the title:

```
bin/preflight --interactive=false --collector-image=croomes/troubleshoot:latest examples/preflight/remote/memory.yaml --format=json
{memory running 0 1}
{memory completed 1 1}
{
  "fail": [
    {
      "title": "Amount of Memory (kind-worker2)",
      "message": "At least 8Gi of memory is required"
    },
    {
      "title": "Amount of Memory (kind-worker)",
      "message": "At least 8Gi of memory is required"
    },
    {
      "title": "Amount of Memory (kind-control-plane)",
      "message": "At least 8Gi of memory is required"
    }
  ]
}
```

Also added a host collector to allow preflight checks of required kernel
modules, which is the main driver for this change.
2021-10-06 09:03:53 -05:00

185 lines
5.2 KiB
Go

package cli
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"os/signal"
"strings"
"time"
"github.com/pkg/errors"
"github.com/replicatedhq/troubleshoot/cmd/util"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
troubleshootclientsetscheme "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/scheme"
"github.com/replicatedhq/troubleshoot/pkg/collect"
"github.com/replicatedhq/troubleshoot/pkg/docrewrite"
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
"github.com/replicatedhq/troubleshoot/pkg/specs"
"github.com/replicatedhq/troubleshoot/pkg/supportbundle"
"github.com/spf13/viper"
"k8s.io/apimachinery/pkg/labels"
)
const (
defaultTimeout = 30 * time.Second
)
func runCollect(v *viper.Viper, arg string) error {
go func() {
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, os.Interrupt)
<-signalChan
os.Exit(0)
}()
var collectorContent []byte
var err error
if strings.HasPrefix(arg, "secret/") {
// format secret/namespace-name/secret-name
pathParts := strings.Split(arg, "/")
if len(pathParts) != 3 {
return errors.Errorf("path %s must have 3 components", arg)
}
spec, err := specs.LoadFromSecret(pathParts[1], pathParts[2], "collect-spec")
if err != nil {
return errors.Wrap(err, "failed to get spec from secret")
}
collectorContent = spec
} else if _, err = os.Stat(arg); err == nil {
b, err := ioutil.ReadFile(arg)
if err != nil {
return err
}
collectorContent = b
} else {
if !util.IsURL(arg) {
return fmt.Errorf("%s is not a URL and was not found (err %s)", arg, err)
}
req, err := http.NewRequest("GET", arg, nil)
if err != nil {
return err
}
req.Header.Set("User-Agent", "Replicated_Collect/v1beta2")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
collectorContent = body
}
collectorContent, err = docrewrite.ConvertToV1Beta2(collectorContent)
if err != nil {
return errors.Wrap(err, "failed to convert to v1beta2")
}
multidocs := strings.Split(string(collectorContent), "\n---\n")
troubleshootclientsetscheme.AddToScheme(scheme.Scheme)
decode := scheme.Codecs.UniversalDeserializer().Decode
additionalRedactors := &troubleshootv1beta2.Redactor{}
for idx, redactor := range v.GetStringSlice("redactors") {
redactorObj, err := supportbundle.GetRedactorFromURI(redactor)
if err != nil {
return errors.Wrapf(err, "failed to get redactor spec %s, #%d", redactor, idx)
}
if redactorObj != nil {
additionalRedactors.Spec.Redactors = append(additionalRedactors.Spec.Redactors, redactorObj.Spec.Redactors...)
}
}
for i, additionalDoc := range multidocs {
if i == 0 {
continue
}
additionalDoc, err := docrewrite.ConvertToV1Beta2([]byte(additionalDoc))
if err != nil {
return errors.Wrap(err, "failed to convert to v1beta2")
}
obj, _, err := decode(additionalDoc, nil, nil)
if err != nil {
return errors.Wrapf(err, "failed to parse additional doc %d", i)
}
multidocRedactors, ok := obj.(*troubleshootv1beta2.Redactor)
if !ok {
continue
}
additionalRedactors.Spec.Redactors = append(additionalRedactors.Spec.Redactors, multidocRedactors.Spec.Redactors...)
}
// make sure we don't block any senders
progressCh := make(chan interface{})
defer close(progressCh)
go func() {
for range progressCh {
}
}()
restConfig, err := k8sutil.GetRESTConfig()
if err != nil {
return errors.Wrap(err, "failed to convert kube flags to rest config")
}
labelSelector, err := labels.Parse(v.GetString("selector"))
if err != nil {
return errors.Wrap(err, "unable to parse selector")
}
namespace := v.GetString("namespace")
if namespace == "" {
namespace = "default"
}
timeout := v.GetDuration("request-timeout")
if timeout == 0 {
timeout = defaultTimeout
}
createOpts := collect.CollectorRunOpts{
CollectWithoutPermissions: v.GetBool("collect-without-permissions"),
KubernetesRestConfig: restConfig,
Image: v.GetString("collector-image"),
PullPolicy: v.GetString("collector-pullpolicy"),
LabelSelector: labelSelector.String(),
Namespace: namespace,
Timeout: timeout,
ProgressChan: progressCh,
}
// we only support HostCollector or RemoteCollector kinds.
hostCollector, err := collect.ParseHostCollectorFromDoc([]byte(multidocs[0]))
if err == nil {
results, err := collect.CollectHost(hostCollector, additionalRedactors, createOpts)
if err != nil {
return errors.Wrap(err, "failed to collect from host")
}
return showHostStdoutResults(v.GetString("format"), hostCollector.Name, results)
}
remoteCollector, err := collect.ParseRemoteCollectorFromDoc([]byte(multidocs[0]))
if err == nil {
results, err := collect.CollectRemote(remoteCollector, additionalRedactors, createOpts)
if err != nil {
return errors.Wrap(err, "failed to collect from remote host(s)")
}
return showRemoteStdoutResults(v.GetString("format"), remoteCollector.Name, results)
}
return errors.New("failed to parse hostCollector or remoteCollector")
}