mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-02-14 18:29:53 +00:00
* new collector dns * implement DNS collector * add dns service and endpoints check * add nil check on retrieve endpoints
This commit is contained in:
@@ -292,6 +292,15 @@ spec:
|
||||
required:
|
||||
- data
|
||||
type: object
|
||||
dns:
|
||||
properties:
|
||||
collectorName:
|
||||
type: string
|
||||
exclude:
|
||||
type: BoolString
|
||||
timeout:
|
||||
type: string
|
||||
type: object
|
||||
exec:
|
||||
properties:
|
||||
args:
|
||||
|
||||
@@ -1972,6 +1972,15 @@ spec:
|
||||
required:
|
||||
- data
|
||||
type: object
|
||||
dns:
|
||||
properties:
|
||||
collectorName:
|
||||
type: string
|
||||
exclude:
|
||||
type: BoolString
|
||||
timeout:
|
||||
type: string
|
||||
type: object
|
||||
exec:
|
||||
properties:
|
||||
args:
|
||||
|
||||
@@ -2003,6 +2003,15 @@ spec:
|
||||
required:
|
||||
- data
|
||||
type: object
|
||||
dns:
|
||||
properties:
|
||||
collectorName:
|
||||
type: string
|
||||
exclude:
|
||||
type: BoolString
|
||||
timeout:
|
||||
type: string
|
||||
type: object
|
||||
exec:
|
||||
properties:
|
||||
args:
|
||||
|
||||
@@ -293,6 +293,11 @@ type Sonobuoy struct {
|
||||
Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"`
|
||||
}
|
||||
|
||||
type DNS struct {
|
||||
CollectorMeta `json:",inline" yaml:",inline"`
|
||||
Timeout string `json:"timeout,omitempty" yaml:"timeout,omitempty"`
|
||||
}
|
||||
|
||||
type Collect struct {
|
||||
ClusterInfo *ClusterInfo `json:"clusterInfo,omitempty" yaml:"clusterInfo,omitempty"`
|
||||
ClusterResources *ClusterResources `json:"clusterResources,omitempty" yaml:"clusterResources,omitempty"`
|
||||
@@ -322,6 +327,7 @@ type Collect struct {
|
||||
Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
|
||||
Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"`
|
||||
NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
|
||||
DNS *DNS `json:"dns,omitempty" yaml:"dns,omitempty"`
|
||||
}
|
||||
|
||||
func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec {
|
||||
|
||||
@@ -926,6 +926,11 @@ func (in *Collect) DeepCopyInto(out *Collect) {
|
||||
*out = new(NodeMetrics)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.DNS != nil {
|
||||
in, out := &in.DNS, &out.DNS
|
||||
*out = new(DNS)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Collect.
|
||||
@@ -1255,6 +1260,22 @@ func (in *CustomResourceDefinition) DeepCopy() *CustomResourceDefinition {
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *DNS) DeepCopyInto(out *DNS) {
|
||||
*out = *in
|
||||
in.CollectorMeta.DeepCopyInto(&out.CollectorMeta)
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DNS.
|
||||
func (in *DNS) DeepCopy() *DNS {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(DNS)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Data) DeepCopyInto(out *Data) {
|
||||
*out = *in
|
||||
|
||||
@@ -124,6 +124,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam
|
||||
return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
|
||||
case collector.NodeMetrics != nil:
|
||||
return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true
|
||||
case collector.DNS != nil:
|
||||
return &CollectDNS{collector.DNS, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
|
||||
default:
|
||||
return nil, false
|
||||
}
|
||||
@@ -215,6 +217,8 @@ func getCollectorName(c interface{}) string {
|
||||
collector = "sonobuoy"
|
||||
case *CollectNodeMetrics:
|
||||
collector = "node-metrics"
|
||||
case *CollectDNS:
|
||||
collector = "dns"
|
||||
default:
|
||||
collector = "<none>"
|
||||
}
|
||||
|
||||
273
pkg/collect/dns.go
Normal file
273
pkg/collect/dns.go
Normal file
@@ -0,0 +1,273 @@
|
||||
package collect
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
dnsUtilsImage = "registry.k8s.io/e2e-test-images/jessie-dnsutils:1.3"
|
||||
)
|
||||
|
||||
type CollectDNS struct {
|
||||
Collector *troubleshootv1beta2.DNS
|
||||
BundlePath string
|
||||
Namespace string
|
||||
ClientConfig *rest.Config
|
||||
Client kubernetes.Interface
|
||||
Context context.Context
|
||||
RBACErrors
|
||||
}
|
||||
|
||||
func (c *CollectDNS) Title() string {
|
||||
return getCollectorName(c)
|
||||
}
|
||||
|
||||
func (c *CollectDNS) IsExcluded() (bool, error) {
|
||||
return isExcluded(c.Collector.Exclude)
|
||||
}
|
||||
|
||||
func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult, error) {
|
||||
|
||||
ctx, cancel := context.WithTimeout(c.Context, time.Duration(60*time.Second))
|
||||
defer cancel()
|
||||
|
||||
sb := strings.Builder{}
|
||||
|
||||
// get kubernetes Cluster IP
|
||||
clusterIP, err := getKubernetesClusterIP(c.Client, ctx)
|
||||
if err == nil {
|
||||
sb.WriteString(fmt.Sprintf("=== Kubernetes Cluster IP from API Server: %s\n", clusterIP))
|
||||
} else {
|
||||
sb.WriteString(fmt.Sprintf("=== Failed to detect Kubernetes Cluster IP: %v\n", err))
|
||||
}
|
||||
|
||||
// run a pod and perform DNS lookup
|
||||
podLog, err := troubleshootDNSFromPod(c.Client, ctx)
|
||||
if err == nil {
|
||||
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", dnsUtilsImage))
|
||||
sb.WriteString(podLog)
|
||||
} else {
|
||||
sb.WriteString(fmt.Sprintf("=== Failed to run commands from pod: %v\n", err))
|
||||
}
|
||||
|
||||
// is DNS pods running?
|
||||
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", getRunningKubeDNSPodNames(c.Client, ctx)))
|
||||
|
||||
// is DNS service up?
|
||||
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", getKubeDNSServiceClusterIP(c.Client, ctx)))
|
||||
|
||||
// are DNS endpoints exposed?
|
||||
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", getKubeDNSEndpoints(c.Client, ctx)))
|
||||
|
||||
// get DNS server config
|
||||
coreDNSConfig, err := getCoreDNSConfig(c.Client, ctx)
|
||||
if err == nil {
|
||||
sb.WriteString("=== CoreDNS config: \n")
|
||||
sb.WriteString(coreDNSConfig)
|
||||
}
|
||||
kubeDNSConfig, err := getKubeDNSConfig(c.Client, ctx)
|
||||
if err == nil {
|
||||
sb.WriteString("=== KubeDNS config: \n")
|
||||
sb.WriteString(kubeDNSConfig)
|
||||
}
|
||||
|
||||
data := sb.String()
|
||||
output := NewResult()
|
||||
output.SaveResult(c.BundlePath, filepath.Join("dns", c.Collector.CollectorName), bytes.NewBuffer([]byte(data)))
|
||||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
func getKubernetesClusterIP(client kubernetes.Interface, ctx context.Context) (string, error) {
|
||||
service, err := client.CoreV1().Services("default").Get(ctx, "kubernetes", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
klog.V(2).Infof("Failed to detect Kubernetes Cluster IP: %v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return service.Spec.ClusterIP, nil
|
||||
}
|
||||
|
||||
func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (string, error) {
|
||||
namespace := "default"
|
||||
command := []string{"/bin/sh", "-c", `
|
||||
set -x
|
||||
cat /etc/resolv.conf
|
||||
nslookup -debug kubernetes
|
||||
exit 0
|
||||
`}
|
||||
|
||||
// TODO: image pull secret?
|
||||
podLabels := map[string]string{
|
||||
"troubleshoot-role": "dns-collector",
|
||||
}
|
||||
pod := &corev1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
GenerateName: "troubleshoot-dns-",
|
||||
Namespace: namespace,
|
||||
Labels: podLabels,
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
Containers: []corev1.Container{
|
||||
{
|
||||
Name: "troubleshoot-dns",
|
||||
Image: dnsUtilsImage,
|
||||
Command: command,
|
||||
},
|
||||
},
|
||||
RestartPolicy: corev1.RestartPolicyNever,
|
||||
},
|
||||
}
|
||||
|
||||
created, err := client.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "failed to run troubleshoot DNS pod")
|
||||
}
|
||||
klog.V(2).Infof("Pod with prefix %s has been created", created.GenerateName)
|
||||
|
||||
defer func() {
|
||||
if created == nil {
|
||||
return
|
||||
}
|
||||
err := client.CoreV1().Pods(namespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to delete troubleshoot DNS pod %s: %v", created.Name, err)
|
||||
}
|
||||
klog.V(2).Infof("Deleted pod %s", created.Name)
|
||||
}()
|
||||
|
||||
// wait for pod to be completed
|
||||
watcher, err := client.CoreV1().Pods(namespace).Watch(ctx, metav1.ListOptions{
|
||||
LabelSelector: "troubleshoot-role=dns-collector",
|
||||
})
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "failed to watch pod")
|
||||
}
|
||||
defer func() {
|
||||
if watcher != nil {
|
||||
watcher.Stop()
|
||||
}
|
||||
}()
|
||||
|
||||
for event := range watcher.ResultChan() {
|
||||
pod, ok := event.Object.(*corev1.Pod)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if pod.Status.Phase == corev1.PodSucceeded {
|
||||
break
|
||||
}
|
||||
if pod.Status.Phase == corev1.PodFailed {
|
||||
return "", errors.New("troubleshoot DNS pod failed")
|
||||
}
|
||||
}
|
||||
|
||||
// get pod logs
|
||||
podLogOpts := corev1.PodLogOptions{}
|
||||
req := client.CoreV1().Pods(namespace).GetLogs(created.Name, &podLogOpts)
|
||||
podLogs, err := req.Stream(ctx)
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "failed to get pod logs")
|
||||
}
|
||||
defer podLogs.Close()
|
||||
|
||||
bytes, err := io.ReadAll(podLogs)
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "failed to read troubleshoot DNS pod logs")
|
||||
}
|
||||
|
||||
return string(bytes), nil
|
||||
}
|
||||
|
||||
func getCoreDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
|
||||
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
klog.V(2).Infof("Failed to detect CoreDNS config: %v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return configMap.Data["Corefile"], nil
|
||||
}
|
||||
|
||||
func getKubeDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
|
||||
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
klog.V(2).Infof("Failed to detect KubeDNS config: %v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
if configMap.Data == nil {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
dataBytes, err := json.Marshal(configMap.Data)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(dataBytes), nil
|
||||
}
|
||||
|
||||
func getRunningKubeDNSPodNames(client kubernetes.Interface, ctx context.Context) string {
|
||||
pods, err := client.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
|
||||
LabelSelector: "k8s-app=kube-dns",
|
||||
})
|
||||
if err != nil {
|
||||
klog.V(2).Infof("failed to list kube-dns pods: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
var podNames []string
|
||||
for _, pod := range pods.Items {
|
||||
if pod.Status.Phase == corev1.PodRunning {
|
||||
podNames = append(podNames, pod.Name)
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(podNames, ", ")
|
||||
}
|
||||
|
||||
func getKubeDNSServiceClusterIP(client kubernetes.Interface, ctx context.Context) string {
|
||||
service, err := client.CoreV1().Services("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
klog.V(2).Infof("failed to get kube-dns service: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
return service.Spec.ClusterIP
|
||||
}
|
||||
|
||||
func getKubeDNSEndpoints(client kubernetes.Interface, ctx context.Context) string {
|
||||
endpoints, err := client.CoreV1().Endpoints("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
klog.V(2).Infof("failed to get kube-dns endpoints: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
var endpointStrings []string
|
||||
for _, subset := range endpoints.Subsets {
|
||||
for _, address := range subset.Addresses {
|
||||
if len(subset.Ports) > 0 {
|
||||
endpointStrings = append(endpointStrings, fmt.Sprintf("%s:%d", address.IP, subset.Ports[0].Port))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(endpointStrings, ", ")
|
||||
}
|
||||
41
pkg/collect/dns_test.go
Normal file
41
pkg/collect/dns_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package collect
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
)
|
||||
|
||||
func TestGetKubernetesClusterIP(t *testing.T) {
|
||||
k8sSvcIp := "10.0.0.1"
|
||||
client := fake.NewSimpleClientset()
|
||||
service := &corev1.Service{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "kubernetes",
|
||||
Namespace: "default",
|
||||
},
|
||||
Spec: corev1.ServiceSpec{
|
||||
ClusterIP: k8sSvcIp,
|
||||
},
|
||||
}
|
||||
|
||||
// Add the service to the fake clientset
|
||||
_, err := client.CoreV1().Services("default").Create(context.TODO(), service, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("error injecting service into fake clientset: %v", err)
|
||||
}
|
||||
|
||||
// Call the function
|
||||
clusterIP, err := getKubernetesClusterIP(client, context.TODO())
|
||||
if err != nil {
|
||||
t.Fatalf("error getting cluster IP: %v", err)
|
||||
}
|
||||
|
||||
// Check the result
|
||||
if clusterIP != k8sSvcIp {
|
||||
t.Errorf("expected %s, got %s", k8sSvcIp, clusterIP)
|
||||
}
|
||||
}
|
||||
@@ -390,6 +390,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"dns": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"collectorName": {
|
||||
"type": "string"
|
||||
},
|
||||
"exclude": {
|
||||
"oneOf": [{"type": "string"},{"type": "boolean"}]
|
||||
},
|
||||
"timeout": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"exec": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
@@ -2978,6 +2978,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"dns": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"collectorName": {
|
||||
"type": "string"
|
||||
},
|
||||
"exclude": {
|
||||
"oneOf": [{"type": "string"},{"type": "boolean"}]
|
||||
},
|
||||
"timeout": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"exec": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
@@ -3024,6 +3024,20 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"dns": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"collectorName": {
|
||||
"type": "string"
|
||||
},
|
||||
"exclude": {
|
||||
"oneOf": [{"type": "string"},{"type": "boolean"}]
|
||||
},
|
||||
"timeout": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"exec": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
Reference in New Issue
Block a user