feat: [sc-103754] Be able to detect search domain misconfiguration #1391 (#1534)

* new collector dns

* implement DNS collector

* add dns service and endpoints check

* add nil check on retrieve endpoints
This commit is contained in:
Gerard Nguyen
2024-05-01 07:04:20 +10:00
committed by GitHub
parent cb5db1733a
commit 6b368f2221
11 changed files with 414 additions and 0 deletions

View File

@@ -292,6 +292,15 @@ spec:
required:
- data
type: object
dns:
properties:
collectorName:
type: string
exclude:
type: BoolString
timeout:
type: string
type: object
exec:
properties:
args:

View File

@@ -1972,6 +1972,15 @@ spec:
required:
- data
type: object
dns:
properties:
collectorName:
type: string
exclude:
type: BoolString
timeout:
type: string
type: object
exec:
properties:
args:

View File

@@ -2003,6 +2003,15 @@ spec:
required:
- data
type: object
dns:
properties:
collectorName:
type: string
exclude:
type: BoolString
timeout:
type: string
type: object
exec:
properties:
args:

View File

@@ -293,6 +293,11 @@ type Sonobuoy struct {
Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"`
}
type DNS struct {
CollectorMeta `json:",inline" yaml:",inline"`
Timeout string `json:"timeout,omitempty" yaml:"timeout,omitempty"`
}
type Collect struct {
ClusterInfo *ClusterInfo `json:"clusterInfo,omitempty" yaml:"clusterInfo,omitempty"`
ClusterResources *ClusterResources `json:"clusterResources,omitempty" yaml:"clusterResources,omitempty"`
@@ -322,6 +327,7 @@ type Collect struct {
Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"`
NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
DNS *DNS `json:"dns,omitempty" yaml:"dns,omitempty"`
}
func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec {

View File

@@ -926,6 +926,11 @@ func (in *Collect) DeepCopyInto(out *Collect) {
*out = new(NodeMetrics)
(*in).DeepCopyInto(*out)
}
if in.DNS != nil {
in, out := &in.DNS, &out.DNS
*out = new(DNS)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Collect.
@@ -1255,6 +1260,22 @@ func (in *CustomResourceDefinition) DeepCopy() *CustomResourceDefinition {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DNS) DeepCopyInto(out *DNS) {
*out = *in
in.CollectorMeta.DeepCopyInto(&out.CollectorMeta)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DNS.
func (in *DNS) DeepCopy() *DNS {
if in == nil {
return nil
}
out := new(DNS)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Data) DeepCopyInto(out *Data) {
*out = *in

View File

@@ -124,6 +124,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam
return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
case collector.NodeMetrics != nil:
return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true
case collector.DNS != nil:
return &CollectDNS{collector.DNS, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
default:
return nil, false
}
@@ -215,6 +217,8 @@ func getCollectorName(c interface{}) string {
collector = "sonobuoy"
case *CollectNodeMetrics:
collector = "node-metrics"
case *CollectDNS:
collector = "dns"
default:
collector = "<none>"
}

273
pkg/collect/dns.go Normal file
View File

@@ -0,0 +1,273 @@
package collect
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"path/filepath"
"strings"
"time"
"github.com/pkg/errors"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)
const (
dnsUtilsImage = "registry.k8s.io/e2e-test-images/jessie-dnsutils:1.3"
)
type CollectDNS struct {
Collector *troubleshootv1beta2.DNS
BundlePath string
Namespace string
ClientConfig *rest.Config
Client kubernetes.Interface
Context context.Context
RBACErrors
}
func (c *CollectDNS) Title() string {
return getCollectorName(c)
}
func (c *CollectDNS) IsExcluded() (bool, error) {
return isExcluded(c.Collector.Exclude)
}
func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult, error) {
ctx, cancel := context.WithTimeout(c.Context, time.Duration(60*time.Second))
defer cancel()
sb := strings.Builder{}
// get kubernetes Cluster IP
clusterIP, err := getKubernetesClusterIP(c.Client, ctx)
if err == nil {
sb.WriteString(fmt.Sprintf("=== Kubernetes Cluster IP from API Server: %s\n", clusterIP))
} else {
sb.WriteString(fmt.Sprintf("=== Failed to detect Kubernetes Cluster IP: %v\n", err))
}
// run a pod and perform DNS lookup
podLog, err := troubleshootDNSFromPod(c.Client, ctx)
if err == nil {
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", dnsUtilsImage))
sb.WriteString(podLog)
} else {
sb.WriteString(fmt.Sprintf("=== Failed to run commands from pod: %v\n", err))
}
// is DNS pods running?
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", getRunningKubeDNSPodNames(c.Client, ctx)))
// is DNS service up?
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", getKubeDNSServiceClusterIP(c.Client, ctx)))
// are DNS endpoints exposed?
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", getKubeDNSEndpoints(c.Client, ctx)))
// get DNS server config
coreDNSConfig, err := getCoreDNSConfig(c.Client, ctx)
if err == nil {
sb.WriteString("=== CoreDNS config: \n")
sb.WriteString(coreDNSConfig)
}
kubeDNSConfig, err := getKubeDNSConfig(c.Client, ctx)
if err == nil {
sb.WriteString("=== KubeDNS config: \n")
sb.WriteString(kubeDNSConfig)
}
data := sb.String()
output := NewResult()
output.SaveResult(c.BundlePath, filepath.Join("dns", c.Collector.CollectorName), bytes.NewBuffer([]byte(data)))
return output, nil
}
func getKubernetesClusterIP(client kubernetes.Interface, ctx context.Context) (string, error) {
service, err := client.CoreV1().Services("default").Get(ctx, "kubernetes", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("Failed to detect Kubernetes Cluster IP: %v", err)
return "", err
}
return service.Spec.ClusterIP, nil
}
func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (string, error) {
namespace := "default"
command := []string{"/bin/sh", "-c", `
set -x
cat /etc/resolv.conf
nslookup -debug kubernetes
exit 0
`}
// TODO: image pull secret?
podLabels := map[string]string{
"troubleshoot-role": "dns-collector",
}
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "troubleshoot-dns-",
Namespace: namespace,
Labels: podLabels,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "troubleshoot-dns",
Image: dnsUtilsImage,
Command: command,
},
},
RestartPolicy: corev1.RestartPolicyNever,
},
}
created, err := client.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
if err != nil {
return "", errors.Wrap(err, "failed to run troubleshoot DNS pod")
}
klog.V(2).Infof("Pod with prefix %s has been created", created.GenerateName)
defer func() {
if created == nil {
return
}
err := client.CoreV1().Pods(namespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
if err != nil {
klog.Errorf("Failed to delete troubleshoot DNS pod %s: %v", created.Name, err)
}
klog.V(2).Infof("Deleted pod %s", created.Name)
}()
// wait for pod to be completed
watcher, err := client.CoreV1().Pods(namespace).Watch(ctx, metav1.ListOptions{
LabelSelector: "troubleshoot-role=dns-collector",
})
if err != nil {
return "", errors.Wrap(err, "failed to watch pod")
}
defer func() {
if watcher != nil {
watcher.Stop()
}
}()
for event := range watcher.ResultChan() {
pod, ok := event.Object.(*corev1.Pod)
if !ok {
continue
}
if pod.Status.Phase == corev1.PodSucceeded {
break
}
if pod.Status.Phase == corev1.PodFailed {
return "", errors.New("troubleshoot DNS pod failed")
}
}
// get pod logs
podLogOpts := corev1.PodLogOptions{}
req := client.CoreV1().Pods(namespace).GetLogs(created.Name, &podLogOpts)
podLogs, err := req.Stream(ctx)
if err != nil {
return "", errors.Wrap(err, "failed to get pod logs")
}
defer podLogs.Close()
bytes, err := io.ReadAll(podLogs)
if err != nil {
return "", errors.Wrap(err, "failed to read troubleshoot DNS pod logs")
}
return string(bytes), nil
}
func getCoreDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("Failed to detect CoreDNS config: %v", err)
return "", err
}
return configMap.Data["Corefile"], nil
}
func getKubeDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("Failed to detect KubeDNS config: %v", err)
return "", err
}
if configMap.Data == nil {
return "", nil
}
dataBytes, err := json.Marshal(configMap.Data)
if err != nil {
return "", err
}
return string(dataBytes), nil
}
func getRunningKubeDNSPodNames(client kubernetes.Interface, ctx context.Context) string {
pods, err := client.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
LabelSelector: "k8s-app=kube-dns",
})
if err != nil {
klog.V(2).Infof("failed to list kube-dns pods: %v", err)
return ""
}
var podNames []string
for _, pod := range pods.Items {
if pod.Status.Phase == corev1.PodRunning {
podNames = append(podNames, pod.Name)
}
}
return strings.Join(podNames, ", ")
}
func getKubeDNSServiceClusterIP(client kubernetes.Interface, ctx context.Context) string {
service, err := client.CoreV1().Services("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("failed to get kube-dns service: %v", err)
return ""
}
return service.Spec.ClusterIP
}
func getKubeDNSEndpoints(client kubernetes.Interface, ctx context.Context) string {
endpoints, err := client.CoreV1().Endpoints("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
if err != nil {
klog.V(2).Infof("failed to get kube-dns endpoints: %v", err)
return ""
}
var endpointStrings []string
for _, subset := range endpoints.Subsets {
for _, address := range subset.Addresses {
if len(subset.Ports) > 0 {
endpointStrings = append(endpointStrings, fmt.Sprintf("%s:%d", address.IP, subset.Ports[0].Port))
}
}
}
return strings.Join(endpointStrings, ", ")
}

41
pkg/collect/dns_test.go Normal file
View File

@@ -0,0 +1,41 @@
package collect
import (
"context"
"testing"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)
func TestGetKubernetesClusterIP(t *testing.T) {
k8sSvcIp := "10.0.0.1"
client := fake.NewSimpleClientset()
service := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "kubernetes",
Namespace: "default",
},
Spec: corev1.ServiceSpec{
ClusterIP: k8sSvcIp,
},
}
// Add the service to the fake clientset
_, err := client.CoreV1().Services("default").Create(context.TODO(), service, metav1.CreateOptions{})
if err != nil {
t.Fatalf("error injecting service into fake clientset: %v", err)
}
// Call the function
clusterIP, err := getKubernetesClusterIP(client, context.TODO())
if err != nil {
t.Fatalf("error getting cluster IP: %v", err)
}
// Check the result
if clusterIP != k8sSvcIp {
t.Errorf("expected %s, got %s", k8sSvcIp, clusterIP)
}
}

View File

@@ -390,6 +390,20 @@
}
}
},
"dns": {
"type": "object",
"properties": {
"collectorName": {
"type": "string"
},
"exclude": {
"oneOf": [{"type": "string"},{"type": "boolean"}]
},
"timeout": {
"type": "string"
}
}
},
"exec": {
"type": "object",
"required": [

View File

@@ -2978,6 +2978,20 @@
}
}
},
"dns": {
"type": "object",
"properties": {
"collectorName": {
"type": "string"
},
"exclude": {
"oneOf": [{"type": "string"},{"type": "boolean"}]
},
"timeout": {
"type": "string"
}
}
},
"exec": {
"type": "object",
"required": [

View File

@@ -3024,6 +3024,20 @@
}
}
},
"dns": {
"type": "object",
"properties": {
"collectorName": {
"type": "string"
},
"exclude": {
"oneOf": [{"type": "string"},{"type": "boolean"}]
},
"timeout": {
"type": "string"
}
}
},
"exec": {
"type": "object",
"required": [