Running collectors without the CRD

This commit is contained in:
Marc Campbell
2019-07-18 02:01:30 +00:00
parent e3b1a9a1d5
commit df4edcb80d
25 changed files with 845 additions and 135 deletions

View File

@@ -5,7 +5,6 @@ import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
@@ -16,7 +15,7 @@ import (
analyzerunner "github.com/replicatedhq/troubleshoot/pkg/analyze"
troubleshootv1beta1 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta1"
preflightrunner "github.com/replicatedhq/troubleshoot/pkg/preflight"
collectrunner "github.com/replicatedhq/troubleshoot/pkg/collect"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
@@ -71,7 +70,7 @@ func runPreflightsNoCRD(v *viper.Viper, arg string) error {
getCollectedFileContents := func(fileName string) ([]byte, error) {
contents, ok := allCollectedData[fileName]
if !ok {
return nil, errors.New("not found")
return nil, fmt.Errorf("file %s was not collected", fileName)
}
return contents, nil
@@ -81,7 +80,8 @@ func runPreflightsNoCRD(v *viper.Viper, arg string) error {
for _, analyzer := range preflight.Spec.Analyzers {
analyzeResult, err := analyzerunner.Analyze(analyzer, getCollectedFileContents)
if err != nil {
return err
fmt.Printf("an analyzer failed to run: %v\n", err)
continue
}
analyzeResults = append(analyzeResults, analyzeResult)
@@ -215,7 +215,7 @@ func runCollectors(v *viper.Viper, preflight troubleshootv1beta1.Preflight) (map
s := runtime.NewScheme()
s.AddKnownTypes(schema.GroupVersion{Group: "", Version: "v1"}, &corev1.ConfigMap{})
for _, collector := range desiredCollectors {
_, pod, err := preflightrunner.CreateCollector(client, s, &owner, preflight.Name, v.GetString("namespace"), collector, v.GetString("image"), v.GetString("pullpolicy"))
_, pod, err := collectrunner.CreateCollector(client, s, &owner, preflight.Name, v.GetString("namespace"), "preflight", collector, v.GetString("image"), v.GetString("pullpolicy"))
if err != nil {
return nil, err
}

View File

@@ -74,6 +74,7 @@ func receiveSupportBundle(collectorJobNamespace string, collectorJobName string)
for filename, maybeContents := range files {
fileDir, fileName := filepath.Split(filename)
outPath := filepath.Join(bundlePath, fileDir)
if err := os.MkdirAll(outPath, 0777); err != nil {
return err
}

View File

@@ -3,7 +3,6 @@ package cli
import (
"errors"
"fmt"
"os"
"path/filepath"
troubleshootv1beta1 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta1"
@@ -79,10 +78,3 @@ func Retrieve() *cobra.Command {
return cmd
}
func homeDir() string {
if h := os.Getenv("HOME"); h != "" {
return h
}
return os.Getenv("USERPROFILE") // windows
}

View File

@@ -1,17 +1,11 @@
package cli
import (
"errors"
"fmt"
"path/filepath"
"time"
troubleshootv1beta1 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta1"
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
"github.com/spf13/cobra"
"github.com/spf13/viper"
kuberneteserrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func Run() *cobra.Command {
@@ -34,91 +28,11 @@ troubleshoot run --collectors application --wait
RunE: func(cmd *cobra.Command, args []string) error {
v := viper.GetViper()
troubleshootClient, err := createTroubleshootK8sClient()
if err != nil {
return err
if len(args) == 0 {
return runTroubleshootCRD(v)
}
collectorName := v.GetString("collectors")
if collectorName == "" {
collectors, err := troubleshootClient.Collectors(v.GetString("namespace")).List(metav1.ListOptions{})
if err != nil {
return err
}
if len(collectors.Items) == 1 {
collectorName = collectors.Items[0].Name
}
}
if collectorName == "" {
return errors.New("unknown collectors, try using the --collectors flags")
}
// generate a unique name
now := time.Now()
suffix := fmt.Sprintf("%d", now.Unix())
collectorJobName := fmt.Sprintf("%s-job-%s", collectorName, suffix[len(suffix)-4:])
collectorJob := troubleshootv1beta1.CollectorJob{
ObjectMeta: metav1.ObjectMeta{
Name: collectorJobName,
Namespace: v.GetString("namespace"),
},
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "collectorjob.troubleshoot.replicated.com",
},
Spec: troubleshootv1beta1.CollectorJobSpec{
Collector: troubleshootv1beta1.CollectorRef{
Name: collectorName,
Namespace: v.GetString("namespace"),
},
Image: v.GetString("image"),
ImagePullPolicy: v.GetString("pullpolicy"),
},
}
if _, err := troubleshootClient.CollectorJobs(v.GetString("namespace")).Create(&collectorJob); err != nil {
return err
}
// Poll the status of the Custom Resource for it to include a callback
var found *troubleshootv1beta1.CollectorJob
start := time.Now()
for {
current, err := troubleshootClient.CollectorJobs(v.GetString("namespace")).Get(collectorJobName, metav1.GetOptions{})
if err != nil && kuberneteserrors.IsNotFound(err) {
continue
} else if err != nil {
return err
}
if current.Status.IsServerReady {
found = current
break
}
if time.Now().Sub(start) > time.Duration(time.Second*10) {
return errors.New("collectorjob failed to start")
}
time.Sleep(time.Millisecond * 200)
}
// Connect to the callback
stopChan, err := k8sutil.PortForward(v.GetString("kubecontext"), 8000, 8000, found.Status.ServerPodNamespace, found.Status.ServerPodName)
if err != nil {
return err
}
if err := receiveSupportBundle(found.Namespace, found.Name); err != nil {
return err
}
// Write
close(stopChan)
return nil
return runTroubleshootNoCRD(v, args[0])
},
}
@@ -134,3 +48,16 @@ troubleshoot run --collectors application --wait
return cmd
}
func ensureCollectorInList(list []*troubleshootv1beta1.Collect, collector troubleshootv1beta1.Collect) []*troubleshootv1beta1.Collect {
for _, inList := range list {
if collector.ClusterResources != nil && inList.ClusterResources != nil {
return list
}
if collector.ClusterInfo != nil && inList.ClusterInfo != nil {
return list
}
}
return append(list, &collector)
}

View File

@@ -0,0 +1,101 @@
package cli
import (
"errors"
"fmt"
"time"
troubleshootv1beta1 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta1"
"github.com/replicatedhq/troubleshoot/pkg/k8sutil"
"github.com/spf13/viper"
kuberneteserrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func runTroubleshootCRD(v *viper.Viper) error {
troubleshootClient, err := createTroubleshootK8sClient()
if err != nil {
return err
}
collectorName := v.GetString("collectors")
if collectorName == "" {
collectors, err := troubleshootClient.Collectors(v.GetString("namespace")).List(metav1.ListOptions{})
if err != nil {
return err
}
if len(collectors.Items) == 1 {
collectorName = collectors.Items[0].Name
}
}
if collectorName == "" {
return errors.New("unknown collectors, try using the --collectors flags")
}
// generate a unique name
now := time.Now()
suffix := fmt.Sprintf("%d", now.Unix())
collectorJobName := fmt.Sprintf("%s-job-%s", collectorName, suffix[len(suffix)-4:])
collectorJob := troubleshootv1beta1.CollectorJob{
ObjectMeta: metav1.ObjectMeta{
Name: collectorJobName,
Namespace: v.GetString("namespace"),
},
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "collectorjob.troubleshoot.replicated.com",
},
Spec: troubleshootv1beta1.CollectorJobSpec{
Collector: troubleshootv1beta1.CollectorRef{
Name: collectorName,
Namespace: v.GetString("namespace"),
},
Image: v.GetString("image"),
ImagePullPolicy: v.GetString("pullpolicy"),
},
}
if _, err := troubleshootClient.CollectorJobs(v.GetString("namespace")).Create(&collectorJob); err != nil {
return err
}
// Poll the status of the Custom Resource for it to include a callback
var found *troubleshootv1beta1.CollectorJob
start := time.Now()
for {
current, err := troubleshootClient.CollectorJobs(v.GetString("namespace")).Get(collectorJobName, metav1.GetOptions{})
if err != nil && kuberneteserrors.IsNotFound(err) {
continue
} else if err != nil {
return err
}
if current.Status.IsServerReady {
found = current
break
}
if time.Now().Sub(start) > time.Duration(time.Second*10) {
return errors.New("collectorjob failed to start")
}
time.Sleep(time.Millisecond * 200)
}
// Connect to the callback
stopChan, err := k8sutil.PortForward(v.GetString("kubecontext"), 8000, 8000, found.Status.ServerPodNamespace, found.Status.ServerPodName)
if err != nil {
return err
}
if err := receiveSupportBundle(found.Namespace, found.Name); err != nil {
return err
}
// Write
close(stopChan)
return nil
}

View File

@@ -0,0 +1,291 @@
package cli
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"time"
"github.com/mholt/archiver"
troubleshootv1beta1 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta1"
collectrunner "github.com/replicatedhq/troubleshoot/pkg/collect"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
)
func runTroubleshootNoCRD(v *viper.Viper, arg string) error {
collectorContent := ""
if !isURL(arg) {
if _, err := os.Stat(arg); os.IsNotExist(err) {
return fmt.Errorf("%s was not found", arg)
}
b, err := ioutil.ReadFile(arg)
if err != nil {
return err
}
collectorContent = string(b)
} else {
resp, err := http.Get(arg)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
collectorContent = string(body)
}
collector := troubleshootv1beta1.Collector{}
if err := yaml.Unmarshal([]byte(collectorContent), &collector); err != nil {
return fmt.Errorf("unable to parse %s collectors", arg)
}
archivePath, err := runCollectors(v, collector)
if err != nil {
return err
}
fmt.Printf("%s\n", archivePath)
return nil
}
func runCollectors(v *viper.Viper, collector troubleshootv1beta1.Collector) (string, error) {
cfg, err := config.GetConfig()
if err != nil {
return "", err
}
client, err := client.New(cfg, client.Options{})
if err != nil {
return "", err
}
clientset, err := kubernetes.NewForConfig(cfg)
if err != nil {
return "", err
}
restClient := clientset.CoreV1().RESTClient()
// deploy an object that "owns" everything to aid in cleanup
owner := corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("troubleshoot-%s-owner", collector.Name),
Namespace: v.GetString("namespace"),
},
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "ConfigMap",
},
Data: make(map[string]string),
}
if err := client.Create(context.Background(), &owner); err != nil {
return "", err
}
defer func() {
if err := client.Delete(context.Background(), &owner); err != nil {
fmt.Println("failed to clean up preflight.")
}
}()
// deploy all collectors
desiredCollectors := make([]*troubleshootv1beta1.Collect, 0, 0)
for _, definedCollector := range collector.Spec {
desiredCollectors = append(desiredCollectors, definedCollector)
}
desiredCollectors = ensureCollectorInList(desiredCollectors, troubleshootv1beta1.Collect{ClusterInfo: &troubleshootv1beta1.ClusterInfo{}})
desiredCollectors = ensureCollectorInList(desiredCollectors, troubleshootv1beta1.Collect{ClusterResources: &troubleshootv1beta1.ClusterResources{}})
podsCreated := make([]*corev1.Pod, 0, 0)
podsDeleted := make([]*corev1.Pod, 0, 0)
collectorDirs := []string{}
bundlePath, err := ioutil.TempDir("", "troubleshoot")
if err != nil {
return "", err
}
// defer os.RemoveAll(bundlePath)
resyncPeriod := time.Second
ctx := context.Background()
watchList := cache.NewListWatchFromClient(restClient, "pods", "", fields.Everything())
_, controller := cache.NewInformer(watchList, &corev1.Pod{}, resyncPeriod,
cache.ResourceEventHandlerFuncs{
UpdateFunc: func(oldObj interface{}, newObj interface{}) {
newPod, ok := newObj.(*corev1.Pod)
if !ok {
return
}
oldPod, ok := oldObj.(*corev1.Pod)
if !ok {
return
}
labels := newPod.Labels
troubleshootRole, ok := labels["troubleshoot-role"]
if !ok || troubleshootRole != "troubleshoot" {
return
}
collectorName, ok := labels["troubleshoot"]
if !ok || collectorName != collector.Name {
return
}
if oldPod.Status.Phase == newPod.Status.Phase {
return
}
if newPod.Status.Phase != corev1.PodSucceeded {
return
}
podLogOpts := corev1.PodLogOptions{}
req := clientset.CoreV1().Pods(newPod.Namespace).GetLogs(newPod.Name, &podLogOpts)
podLogs, err := req.Stream()
if err != nil {
fmt.Println("get stream")
return
}
defer podLogs.Close()
buf := new(bytes.Buffer)
_, err = io.Copy(buf, podLogs)
if err != nil {
fmt.Println("copy logs")
return
}
collectorDir, err := parseAndSaveCollectorOutput(buf.String(), bundlePath)
if err != nil {
fmt.Printf("parse collected data: %v\n", err)
return
}
collectorDirs = append(collectorDirs, collectorDir)
if err := client.Delete(context.Background(), newPod); err != nil {
fmt.Println("delete pod")
}
podsDeleted = append(podsDeleted, newPod)
},
})
go func() {
controller.Run(ctx.Done())
}()
s := runtime.NewScheme()
s.AddKnownTypes(schema.GroupVersion{Group: "", Version: "v1"}, &corev1.ConfigMap{})
for _, collect := range desiredCollectors {
_, pod, err := collectrunner.CreateCollector(client, s, &owner, collector.Name, v.GetString("namespace"), "troubleshoot", collect, v.GetString("image"), v.GetString("pullpolicy"))
if err != nil {
return "", err
}
podsCreated = append(podsCreated, pod)
}
start := time.Now()
for {
if start.Add(time.Second * 30).Before(time.Now()) {
fmt.Println("timeout running troubleshoot")
return "", err
}
if len(podsDeleted) == len(podsCreated) {
break
}
time.Sleep(time.Millisecond * 200)
}
ctx.Done()
tarGz := archiver.TarGz{
Tar: &archiver.Tar{
ImplicitTopLevelFolder: false,
},
}
paths := make([]string, 0, 0)
for _, collectorDir := range collectorDirs {
paths = append(paths, collectorDir)
}
if err := tarGz.Archive(paths, "support-bundle.tar.gz"); err != nil {
return "", err
}
return "support-bundle.tar.gz", nil
}
func parseAndSaveCollectorOutput(output string, bundlePath string) (string, error) {
dir := ""
input := make(map[string]interface{})
if err := json.Unmarshal([]byte(output), &input); err != nil {
return "", err
}
for filename, maybeContents := range input {
fileDir, fileName := filepath.Split(filename)
outPath := filepath.Join(bundlePath, fileDir)
dir = outPath
if err := os.MkdirAll(outPath, 0777); err != nil {
return "", err
}
switch maybeContents.(type) {
case string:
decoded, err := base64.StdEncoding.DecodeString(maybeContents.(string))
if err != nil {
return "", err
}
if err := writeFile(filepath.Join(outPath, fileName), decoded); err != nil {
return "", err
}
case map[string]interface{}:
for k, v := range maybeContents.(map[string]interface{}) {
s, _ := filepath.Split(filepath.Join(outPath, fileName, k))
if err := os.MkdirAll(s, 0777); err != nil {
return "", err
}
decoded, err := base64.StdEncoding.DecodeString(v.(string))
if err != nil {
return "", err
}
if err := writeFile(filepath.Join(outPath, fileName, k), decoded); err != nil {
return "", err
}
}
}
}
return dir, nil
}

View File

@@ -1,11 +1,30 @@
package cli
import (
"net/url"
"os"
troubleshootclientv1beta1 "github.com/replicatedhq/troubleshoot/pkg/client/troubleshootclientset/typed/troubleshoot/v1beta1"
"github.com/spf13/viper"
"k8s.io/client-go/tools/clientcmd"
)
func homeDir() string {
if h := os.Getenv("HOME"); h != "" {
return h
}
return os.Getenv("USERPROFILE") // windows
}
func isURL(str string) bool {
_, err := url.ParseRequestURI(str)
if err != nil {
return false
}
return true
}
func createTroubleshootK8sClient() (*troubleshootclientv1beta1.TroubleshootV1beta1Client, error) {
v := viper.GetViper()