~bsprague/k8s-restart

4eb37062a1766f0afecf95593958c5c2817fa137 — Brandon Sprague 8 months ago 533f5ae main
K8s example, Docker file, watch for deployment
4 files changed, 256 insertions(+), 8 deletions(-)

M .gitignore
A Dockerfile
M README.md
M main.go
M .gitignore => .gitignore +1 -0
@@ 1,3 1,4 @@
/service-mapping.json
/frpc.ini
/k8s-restart
/deployment.yaml

A Dockerfile => Dockerfile +38 -0
@@ 0,0 1,38 @@
# docker build -t 192.168.5.3:5000/k8s-restart .
FROM golang:1.21 as build

RUN adduser \
  --disabled-password \
  --gecos "" \
  --home "/nonexistent" \
  --shell "/sbin/nologin" \
  --no-create-home \
  --uid 65532 \
  noroot

WORKDIR /build

COPY go.mod .
COPY go.sum .

RUN go mod download
RUN go mod verify

COPY main.go .

RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o k8s-restart -ldflags "-s -w" .

FROM scratch

WORKDIR /app

COPY --from=build /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
COPY --from=build /etc/passwd /etc/passwd
COPY --from=build /etc/group /etc/group

COPY --from=build /build/k8s-restart .

USER noroot:noroot

CMD ["/app/k8s-restart"]
\ No newline at end of file

M README.md => README.md +143 -3
@@ 52,9 52,149 @@ There are also `--host_override` and `--scheme_override` flags, which can be use

Note that these overrides only take place if there's no host info in the request.

## Deploying

There's a `Dockerfile` in the repo, you can build an image with:

```bash
docker build -t <registry>/k8s-restart .
```

For deploying in a K8s cluster, you can use something like:

```yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: k8s-restart
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: k8s-restart
rules:
- apiGroups: ["apps"]
  resources: ["deployments"]
  verbs:
  - get
  - list
  - patch
  - watch
- apiGroups: ["apps"]
  resources: ["deployments/scale"]
  verbs:
  - update
  - patch
- apiGroups: [""]
  resources: ["pods"]
  verbs:
  - list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: k8s-restart
subjects:
- kind: ServiceAccount
  name: k8s-restart
roleRef:
  kind: Role
  name: k8s-restart
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: k8s-restart-config
data:
  service-mapping.json: |
    {
      "svc1": {
        "namespace": "ns1",
        "deployment_name": "svc1-deployment"
      },
      "svc2": {
        "namespace": "ns2",
        "deployment_name": "svc2-deployment"
      },
      "svc3": {
        "namespace": "ns3",
        "deployment_name": "svc3-deployment"
      }
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: k8s-restart-deployment
  labels:
    app: k8s-restart
spec:
  selector:
    matchLabels:
      app: k8s-restart
  # See https://kubernetes.io/docs/tasks/run-application/run-single-instance-stateful-application/
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app: k8s-restart
    spec:
        serviceAccountName: k8s-restart
        containers:
        - image: <registry>/k8s-restart
          name: k8s-restart
          env:
          - name: HOST_OVERRIDE
            value: <your host>
          - name: SCHEME_OVERRIDE
            value: https
          - name: SERVICE_MAPPING
            value: /config/service-mapping.json
          - name: ALLOWED_NUMBER
            value: <your phone number>
          - name: ADDR
            value: ":8080"
          - name: IN_CLUSTER
            value: "true"
          - name: TWILIO_AUTH_TOKEN
            # This secret needs to exist
            valueFrom:
              secretKeyRef:
                name: twilio
                key: auth_token
          volumeMounts:
          - mountPath: /config
            name: config
          ports:
            - containerPort: 8080
              name: web
        volumes:
        - name: config
          configMap:
            name: k8s-restart-config
---
apiVersion: v1
kind: Service
metadata:
  name: k8s-restart
spec:
  selector:
    app: k8s-restart
  ports:
    - name: web
      protocol: TCP
      port: 8080
      targetPort: 8080
```

Make sure to set the `namespace` as appropriate.

## TODO

- [ ] Use TwiML responses to let user know if we actually rebooted or not, as opposed to just guessing
- [ ] Consider waiting/polling for deployment to finish updating
- [ ] Add example K8s configuration for deploying this
- [ ] Investigate if `watch` is doing what we want here, it seems to return awfully fast.
- [x] Use TwiML responses to let user know if we actually rebooted or not, as opposed to just guessing
- [x] Consider waiting/polling for deployment to finish updating
- [x] Add example K8s configuration for deploying this
  - That should include a minimal Dockerfile/image for packaging this

M main.go => main.go +74 -5
@@ 3,9 3,11 @@ package main
import (
	"bytes"
	"encoding/json"
	"encoding/xml"
	"errors"
	"flag"
	"fmt"
	"io"
	"log"
	"net/http"
	"os"


@@ 24,6 26,8 @@ import (
	"k8s.io/client-go/tools/clientcmd"
	"k8s.io/client-go/util/homedir"

	appsv1 "k8s.io/api/apps/v1"
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)



@@ 161,13 165,15 @@ func (s *server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	// Do some more validity checks
	if from != s.allowedNumber {
		log.Printf("non-allowed number %q made request", from)
		http.Error(w, http.StatusText(http.StatusForbidden), http.StatusForbidden)
		// http.Error(w, http.StatusText(http.StatusForbidden), http.StatusForbidden)
		twiMLResponse(w, []byte("you aren't allowed to do that"))
		return
	}

	if !strings.HasPrefix(msg, "reboot: ") {
		log.Printf("message didn't start with 'reboot: ', was %q", msg)
		http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest)
		// http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest)
		twiMLResponse(w, []byte("bad message, fix it and try again"))
		return
	}



@@ 175,7 181,8 @@ func (s *server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	svc, ok := s.serviceMap[svcToReboot]
	if !ok {
		log.Printf("unknown service %q", svcToReboot)
		http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest)
		// http.Error(w, http.StatusText(http.StatusBadRequest), http.StatusBadRequest)
		twiMLResponse(w, []byte(fmt.Sprintf("unknown service %q", svcToReboot)))
		return
	}



@@ 195,7 202,8 @@ func (s *server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	})
	if err != nil {
		log.Printf("failed to marshal patch: %v", err)
		http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
		// http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
		twiMLResponse(w, []byte("uh oh, our patch was bad, aborting"))
		return
	}
	if _, err := client.Patch(


@@ 205,9 213,43 @@ func (s *server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
		patch, metav1.PatchOptions{},
	); err != nil {
		log.Printf("failed to patch deployment %q, %q: %v", svc.DeploymentName, svc.Namespace, err)
		http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
		// http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
		twiMLResponse(w, []byte("uh oh, patching the deployment failed, aborting"))
		return
	}
	log.Printf("Issued patch to service %q in namespace %q based on received message %q", svc.DeploymentName, svc.Namespace, msgID)

	// Watch for changes in status
	watch, err := client.Watch(r.Context(), metav1.ListOptions{Watch: true})
	if err != nil {
		log.Printf("failed to wait for deployment to finish %q, %q: %v", svc.DeploymentName, svc.Namespace, err)
		// http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
		twiMLResponse(w, []byte("patching succeeded, but unclear if it finished, error watching"))
		return
	}

	for ev := range watch.ResultChan() {
		deployment, ok := ev.Object.(*appsv1.Deployment)
		if !ok {
			continue
		}
		// check if the deployment is available
		for _, condition := range deployment.Status.Conditions {
			if condition.Type != appsv1.DeploymentAvailable {
				continue
			}
			if condition.Status != corev1.ConditionTrue {
				continue
			}
			log.Printf("Finished deploying service %q in namespace %q based on received message %q", svc.DeploymentName, svc.Namespace, msgID)
			watch.Stop()
			twiMLResponse(w, []byte(fmt.Sprintf("%s restarted successfully!", svcToReboot)))
			return
		}
	}

	log.Printf("failed to wait for deployment to finish %q, %q, probably timed out?", svc.DeploymentName, svc.Namespace)
	twiMLResponse(w, []byte("patching succeeded, but unclear if it finished, timed out"))
}

func formatNumber(phoneNumber string) (string, error) {


@@ 257,3 299,30 @@ func loadServiceMap(p string) (map[string]restartableService, error) {
	}
	return serviceMap, nil
}

var (
	head = `<?xml version="1.0" encoding="UTF-8"?>
<Response>
    <Message><Body>`
	foot = `</Body></Message>
</Response>`
)

func twiMLResponse(w http.ResponseWriter, msg []byte) {
	w.Header().Add("Content-Type", "application/xml")

	if _, err := io.WriteString(w, head); err != nil {
		log.Printf("failed to write head TwiML: %v", err)
		return
	}

	if err := xml.EscapeText(w, msg); err != nil {
		log.Printf("failed to write msg TwiML: %v", err)
		return
	}

	if _, err := io.WriteString(w, foot); err != nil {
		log.Printf("failed to write foot TwiML: %v", err)
		return
	}
}