diff --git a/cmd/kured/Dockerfile b/cmd/kured/Dockerfile index a3f3b57..dba4021 100644 --- a/cmd/kured/Dockerfile +++ b/cmd/kured/Dockerfile @@ -1,5 +1,5 @@ -FROM ubuntu:16.04 -RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/cache/apt +FROM alpine:3.8 +RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/* # NB: you may need to update RBAC permissions when upgrading kubectl - see kured-rbac.yaml for details ADD https://storage.googleapis.com/kubernetes-release/release/v1.12.1/bin/linux/amd64/kubectl /usr/bin/kubectl RUN chmod 0755 /usr/bin/kubectl diff --git a/cmd/kured/main.go b/cmd/kured/main.go index c1b4d96..e5b3d1b 100644 --- a/cmd/kured/main.go +++ b/cmd/kured/main.go @@ -97,16 +97,23 @@ func newCommand(name string, arg ...string) *exec.Cmd { } func sentinelExists() bool { - _, err := os.Stat(rebootSentinel) - switch { - case err == nil: - return true - case os.IsNotExist(err): - return false - default: - log.Fatalf("Unable to determine existence of sentinel: %v", err) - return false // unreachable; prevents compilation error + // Relies on hostPID:true and privileged:true to enter host mount space + sentinelCmd := newCommand("/usr/bin/nsenter", "-m/proc/1/ns/mnt", "--", "/usr/bin/test", "-f", rebootSentinel) + if err := sentinelCmd.Run(); err != nil { + switch err := err.(type) { + case *exec.ExitError: + // We assume a non-zero exit code means 'reboot not required', but of course + // the user could have misconfigured the sentinel command or something else + // went wrong during its execution. In that case, not entering a reboot loop + // is the right thing to do, and we are logging stdout/stderr of the command + // so it should be obvious what is wrong. + return false + default: + // Something was grossly misconfigured, such as the command path being wrong. + log.Fatalf("Error invoking sentinel command: %v", err) + } } + return true } func rebootRequired() bool { @@ -205,8 +212,8 @@ func commandReboot(nodeID string) { } } - // Relies on /var/run/dbus/system_bus_socket bind mount to talk to systemd - rebootCmd := newCommand("/bin/systemctl", "reboot") + // Relies on hostPID:true and privileged:true to enter host mount space + rebootCmd := newCommand("/usr/bin/nsenter", "-m/proc/1/ns/mnt", "/bin/systemctl", "reboot") if err := rebootCmd.Run(); err != nil { log.Fatalf("Error invoking reboot command: %v", err) } diff --git a/kured-ds.yaml b/kured-ds.yaml index 216acce..ba16402 100644 --- a/kured-ds.yaml +++ b/kured-ds.yaml @@ -25,13 +25,23 @@ spec: tolerations: - key: node-role.kubernetes.io/master effect: NoSchedule + hostPID: true # Facilitate entering the host mount namespace via init + restartPolicy: Always containers: - name: kured image: quay.io/weaveworks/kured imagePullPolicy: IfNotPresent + securityContext: + privileged: true # Give permission to nsenter /proc/1/ns/mnt + env: + # Pass in the name of the node on which this pod is scheduled + # for use with drain/uncordon operations and lock acquisition + - name: KURED_NODE_ID + valueFrom: + fieldRef: + fieldPath: spec.nodeName command: - /usr/bin/kured -# args: # - --alert-filter-regexp=^RebootRequired$ # - --ds-name=kured # - --ds-namespace=kube-system @@ -41,23 +51,3 @@ spec: # - --reboot-sentinel=/var/run/reboot-required # - --slack-hook-url=https://hooks.slack.com/... # - --slack-username=prod -# -# NO USER SERVICEABLE PARTS BEYOND THIS POINT - env: - # Pass in the name of the node on which this pod is scheduled - # for use with drain/uncordon operations and lock acquisition - - name: KURED_NODE_ID - valueFrom: - fieldRef: - fieldPath: spec.nodeName - volumeMounts: - # Needed for two purposes: - # * Testing for the existence of /var/run/reboot-required - # * Accessing /var/run/dbus/system_bus_socket to effect reboot - - name: hostrun - mountPath: /var/run - restartPolicy: Always - volumes: - - name: hostrun - hostPath: - path: /var/run