~bitfehler/k8s.sr.ht

e36b6b294512e245dca6309454360a3320c1687b — Conrad Hoffmann 1 year, 2 months ago 70dae93
Add basic declarations for meta and builds

The containers could be much improved upon, but for now they keep things
nicely compatible with current packaging approach, which we might want
to keep until we've migrated everything over.
A services/README => services/README +3 -0
@@ 0,0 1,3 @@
K8s declarations for sr.ht services

WARNING: this is all still experimental and subject to change

A services/builds.sr.ht-ssh/Dockerfile => services/builds.sr.ht-ssh/Dockerfile +17 -0
@@ 0,0 1,17 @@
FROM alpine:3.17

RUN sed -i "1s/^/https:\/\/mirror.sr.ht\/alpine\/v3.17\/sr.ht\n/" /etc/apk/repositories
RUN wget -q -O /etc/apk/keys/alpine@sr.ht.rsa.pub https://mirror.sr.ht/alpine/alpine@sr.ht.rsa.pub

# TODO: starting w/ alpine 3.18, envsubst was moved to gettext-envsubst
RUN apk update && apk --no-cache add builds.sr.ht-worker git.sr.ht gettext doas openssh shadow

RUN usermod -p '*' builds && mkdir -p /home/builds && chown builds:builds /home/builds && usermod -s /bin/ash builds
RUN mkdir -p /run/builds.sr.ht/ssh
RUN mkdir -p /etc/sr.ht
WORKDIR /run/builds.sr.ht/ssh

COPY docker-entrypoint.sh /usr/local/bin/
COPY runner-shell /usr/bin/
COPY sshd_config /etc/ssh/
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

A services/builds.sr.ht-ssh/docker-entrypoint.sh => services/builds.sr.ht-ssh/docker-entrypoint.sh +9 -0
@@ 0,0 1,9 @@
#!/bin/sh
set -e

envsubst < /etc/sr.ht-k8s/config.ini > /etc/sr.ht/config.ini

# TODO make less lame
ssh-keygen -A

exec /usr/sbin/sshd -D -e

A services/builds.sr.ht-ssh/pod.yaml => services/builds.sr.ht-ssh/pod.yaml +43 -0
@@ 0,0 1,43 @@
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: buildsrht-ssh
  name: buildsrht-ssh-test
spec:
  containers:
  - image: registry.gensokyo:5000/builds.sr.ht-ssh:latest
    name: buildsrht-ssh
    volumeMounts:
    - name: config
      mountPath: "/etc/sr.ht-k8s/"
      readOnly: true
    env:
      - name: BUILDSRHT_DB_PASS
        valueFrom:
          secretKeyRef:
            name: postgres-credentials
            key: superUserPassword
  volumes:
  - name: config
    projected:
      sources:
      - secret:
          name: site-key-test
      - configMap:
          name: config.ini.builds

---
apiVersion: v1
kind: Service
metadata:
  name: buildsrht-ssh
spec:
  selector:
    app: buildsrht-ssh
  ports:
    - protocol: TCP
      port: 22
      targetPort: 22
  externalIPs:
    - 46.23.81.204

A services/builds.sr.ht-ssh/runner-shell => services/builds.sr.ht-ssh/runner-shell +124 -0
@@ 0,0 1,124 @@
#!/usr/bin/env python3
from buildsrht.manifest import Manifest
from datetime import datetime
from humanize import naturaltime
from srht.config import cfg, get_origin
from srht.redis import redis
import os
import requests
import shlex
import subprocess
import sys
import time
import yaml

def fail(reason):
    owner = cfg("sr.ht", "owner-name")
    email = cfg("sr.ht", "owner-email")
    print(reason)
    print(f"Please reach out to {owner} <{email}> for support.")
    sys.exit(1)

cmd = os.environ.get("SSH_ORIGINAL_COMMAND") or ""
cmd = shlex.split(cmd)
if len(cmd) < 2:
    fail("Usage: ssh ... connect <job ID>")
op = cmd[0]
if op not in ["connect", "tail"]:
    fail("Usage: ssh ... connect <job ID>")
job_id = int(cmd[1])
cmd = cmd[2:]

bind_address = cfg("builds.sr.ht::worker", "name", "0.0.0.0:8080")
k8s_port_prefix = cfg("builds.sr.ht::worker", "k8s-port-prefix", "")
k8s_jobs_namespace = cfg("builds.sr.ht::worker", "k8s-jobs-namespace", "")

def get_info(job_id):
    r = requests.get(f"http://{bind_address}/job/{job_id}/info")
    if r.status_code != 200:
        return None
    return r.json()

info = get_info(job_id)
if not info:
    fail("No such job found.")

username = sys.argv[1]
if username != info["username"]:
    fail("You are not permitted to connect to this job.")

if len(cmd) == 0:
    url = f"{get_origin('builds.sr.ht', external=True)}/~{username}/job/{job_id}"
    print(f"Connected to build job #{job_id} ({info['status']}): {url}")
deadline = datetime.utcfromtimestamp(info["deadline"])

manifest = Manifest(yaml.safe_load(info["manifest"]))

def connect(job_id, info):
    """Opens a shell on the build VM"""
    limit = naturaltime(datetime.utcnow() - deadline)
    if len(cmd) == 0:
        print(f"Your VM will be terminated {limit}, or when you log out.")
        print()
    requests.post(f"http://{bind_address}/job/{job_id}/claim")
    sys.stdout.flush()
    sys.stderr.flush()
    try:
        tty = os.open("/dev/tty", os.O_RDWR)
        os.dup2(0, tty)
    except:
        pass # non-interactive
    redis.incr(f"builds.sr.ht-shell-{job_id}")
    port = str(info["port"])
    ssh = [
        "ssh", "-qt",
        "-o", "UserKnownHostsFile=/dev/null",
        "-o", "StrictHostKeyChecking=no",
        "-o", "LogLevel=quiet",
    ]
    if k8s_port_prefix:
        target = f"build@{k8s_port_prefix}{port}"
        if k8s_jobs_namespace:
            target = f"{target}.{k8s_jobs_namespace}"
        ssh += [target]
    else:
        ssh += ["-p", port, "build@localhost"]

    subprocess.call(ssh + cmd)
    n = redis.decr(f"builds.sr.ht-shell-{job_id}")
    if n == 0:
        requests.post(f"http://{bind_address}/job/{job_id}/terminate")

def tail(job_id, info):
    """Tails the build logs to stdout"""
    logs = os.path.join(cfg("builds.sr.ht::worker", "buildlogs"), str(job_id))
    p = subprocess.Popen(["tail", "-f", os.path.join(logs, "log")])
    tasks = set()
    procs = [p]
    # holy bejeezus this is hacky
    while True:
        for task in manifest.tasks:
            if task.name in tasks:
                continue
            path = os.path.join(logs, task.name, "log")
            if os.path.exists(path):
                procs.append(subprocess.Popen(
                    f"tail -f {shlex.quote(path)} | " +
                    "awk '{ print \"[" + shlex.quote(task.name) + "] \" $0 }'",
                    shell=True))
                tasks.update({ task.name })
        info = get_info(job_id)
        if not info:
            break
        if info["task"] == info["tasks"]:
            for p in procs:
                p.kill()
            break
        time.sleep(3)

if op == "connect":
    if info["task"] != info["tasks"] and info["status"] == "running":
        tail(job_id, info)
    connect(job_id, info)
elif op == "tail":
    tail(job_id, info)

A services/builds.sr.ht-ssh/sshd_config => services/builds.sr.ht-ssh/sshd_config +119 -0
@@ 0,0 1,119 @@
#	$OpenBSD: sshd_config,v 1.103 2018/04/09 20:41:22 tj Exp $

# This is the sshd server system-wide configuration file.  See
# sshd_config(5) for more information.

# This sshd was compiled with PATH=/bin:/usr/bin:/sbin:/usr/sbin

# The strategy used for options in the default sshd_config shipped with
# OpenSSH is to specify options with their default value where
# possible, but leave them commented.  Uncommented options override the
# default value.

#Port 22
#AddressFamily any
#ListenAddress 0.0.0.0
#ListenAddress ::

#HostKey /etc/ssh/ssh_host_rsa_key
#HostKey /etc/ssh/ssh_host_ecdsa_key
#HostKey /etc/ssh/ssh_host_ed25519_key

# Ciphers and keying
#RekeyLimit default none

# Logging
#SyslogFacility AUTH
#LogLevel INFO

# Authentication:

#LoginGraceTime 2m
#PermitRootLogin prohibit-password
PermitRootLogin no
#StrictModes yes
#MaxAuthTries 6
#MaxSessions 10

#PubkeyAuthentication yes

# The default is to check both .ssh/authorized_keys and .ssh/authorized_keys2
# but this is overridden so installations will only check .ssh/authorized_keys
AuthorizedKeysFile	.ssh/authorized_keys

#AuthorizedPrincipalsFile none

AuthorizedKeysCommand /usr/bin/gitsrht-dispatch "%u" "%h" "%t" "%k"
AuthorizedKeysCommandUser root

# For this to work you will also need host keys in /etc/ssh/ssh_known_hosts
#HostbasedAuthentication no
# Change to yes if you don't trust ~/.ssh/known_hosts for
# HostbasedAuthentication
#IgnoreUserKnownHosts no
# Don't read the user's ~/.rhosts and ~/.shosts files
#IgnoreRhosts yes

# To disable tunneled clear text passwords, change to no here!
#PasswordAuthentication yes
PasswordAuthentication no
#PermitEmptyPasswords no

# Change to no to disable s/key passwords
#ChallengeResponseAuthentication yes

# Kerberos options
#KerberosAuthentication no
#KerberosOrLocalPasswd yes
#KerberosTicketCleanup yes
#KerberosGetAFSToken no

# GSSAPI options
#GSSAPIAuthentication no
#GSSAPICleanupCredentials yes

# Set this to 'yes' to enable PAM authentication, account processing,
# and session processing. If this is enabled, PAM authentication will
# be allowed through the ChallengeResponseAuthentication and
# PasswordAuthentication.  Depending on your PAM configuration,
# PAM authentication via ChallengeResponseAuthentication may bypass
# the setting of "PermitRootLogin without-password".
# If you just want the PAM account and session checks to run without
# PAM authentication, then enable this but set PasswordAuthentication
# and ChallengeResponseAuthentication to 'no'.
#UsePAM no

#AllowAgentForwarding yes
# Feel free to re-enable these if your use case requires them.
AllowTcpForwarding no
GatewayPorts no
X11Forwarding no
#X11DisplayOffset 10
#X11UseLocalhost yes
#PermitTTY yes
#PrintMotd yes
#PrintLastLog yes
#TCPKeepAlive yes
#PermitUserEnvironment no
#Compression delayed
#ClientAliveInterval 0
#ClientAliveCountMax 3
#UseDNS no
#PidFile /run/sshd.pid
#MaxStartups 10:30:100
#PermitTunnel no
#ChrootDirectory none
#VersionAddendum none

# no default banner path
#Banner none

# override default of no subsystems
Subsystem	sftp	/usr/lib/ssh/sftp-server

# Example of overriding settings on a per-user basis
#Match User anoncvs
#	X11Forwarding no
#	AllowTcpForwarding no
#	PermitTTY no
#	ForceCommand cvs server

A services/builds.sr.ht-worker/Dockerfile => services/builds.sr.ht-worker/Dockerfile +17 -0
@@ 0,0 1,17 @@
FROM alpine:edge

RUN sed -i "1s/^/https:\/\/mirror.sr.ht\/alpine\/v3.17\/sr.ht\n/" /etc/apk/repositories
RUN wget -q -O /etc/apk/keys/alpine@sr.ht.rsa.pub https://mirror.sr.ht/alpine/alpine@sr.ht.rsa.pub

# TODO: starting w/ alpine 3.18, envsubst was moved to gettext-envsubst
RUN apk update && apk --no-cache add builds.sr.ht-worker gettext kubectl doas openssh-client
RUN echo "permit nopass keepenv root cmd /usr/bin/builds.sr.ht-worker" >> /etc/doas.d/doas.conf

RUN mkdir /run/builds.sr.ht && chown builds:builds /run/builds.sr.ht
WORKDIR /run/builds.sr.ht

COPY docker-entrypoint.sh /usr/local/bin/
COPY control /var/lib/images/
COPY worker /usr/bin/builds.sr.ht-worker
COPY image-control.conf /etc/
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

A services/builds.sr.ht-worker/control => services/builds.sr.ht-worker/control +357 -0
@@ 0,0 1,357 @@
#!/bin/sh -eu
self=$(readlink -f $0)
self=$(dirname "$self")
# The actual images might be in a different place than this script and the meta
# data. If so, $images should be configured via /etc/image-control.conf
images="$self"

if [ -f /etc/image-control.conf ]
then
	. /etc/image-control.conf
fi

base=$1
cmd=$2
shift 2

if [ ! -d $self/$base ]
then
	printf "Image '%s' does not exist\n" "$base" >&2
	exit 1
fi

ssh_opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
guestport=22

guest_ssh() {
	if [ "$default_means" = "k8s" ]; then
		# Pretty horrible, but should do until all this gets ported to Go
		ssh -p 22 $ssh_opts -o "Hostname=builds-port-${port}.build-jobs" "$@"
	else
		ssh $ssh_opts "$@"
	fi
}

cpu_opts() {
	if [ "$(uname -m)" = "$1" ]
	then
		if [ -e /dev/kvm ] || [ "$default_means" = "k8s" ]
		then
			printf "%s" "-cpu host -enable-kvm"
			return
		fi
	fi
	case "$1" in
		aarch64)
			printf "%s" "-M virt -cpu cortex-a53"
			;;
		x86_64)
			printf "%s" "-cpu qemu64"
			;;
		ppc64le)
			printf "%s" "-cpu power9 -machine pseries"
			;;
		*)
			printf "Unsupported architecture '%s'\n" "$arch" >&2
			exit 1
			;;
	esac
}

_k8s_boot() {
# The following variables have to be set in /etc/image-control.conf:
#
# - $default_means: must be "k8s"
# - $images: path where the actual build images are mounted
# - $k8s_port_prefix: must match `k8s-port-prefix` in the
#   [builds.sr.ht::worker] section of config.ini
# - $k8s_jobs_namespace: must match `k8s-jobs-namespace` in the
#   [builds.sr.ht::worker] section of config.ini
# - $k8s_qemu_image_ref: reference to the QEMU docker image, e.g.
#   registry.example.org/qemu:latest
# - $k8s_build_images_pvc: name of the persistent volume claim for a volume
#   containing the actual build images
# - $k8s_kvm_resource: the name under which a device plugin makes the host's
#   /dev/kvm device available
#
	port_name="${k8s_port_prefix}${port}"
	cat <<EOF | tee /tmp/k8s-$port.id | kubectl apply -f - > /dev/null
apiVersion: batch/v1
kind: Job
metadata:
  name: ${port_name}
  namespace: ${k8s_jobs_namespace}
  labels:
    job: ${port_name}
spec:
  template:
    metadata:
      labels:
        job: ${port_name}
    spec:
      containers:
      - name: qemu
        image: ${k8s_qemu_image_ref}
        command:
        - "/bin/${qemu:-qemu-system-$arch}"
        - "-m"
        - "${MEMORY:-4096}"
        - "-smp"
        - "cpus=2"
        - "-net"
        - "nic,model=virtio"
        - "-net"
        - "user,hostfwd=tcp::22-:$guestport"
        - "-display"
        - "none"
        - "-device"
        - "virtio-rng-pci"
        - "-device"
        - "virtio-balloon"
        - "-drive"
        - "file=$wd/$arch/root.img.qcow2,media=disk,snapshot=on,${driveopts:-if=virtio}"
$(for arg; do printf "        - \"$arg\"\n"; done)
        volumeMounts:
          - name: build-images
            mountPath: /var/lib/images
          - name: tmp
            mountPath: /var/tmp
        resources:
          limits:
            ${k8s_kvm_resource}: 1
      volumes:
        - name: build-images
          persistentVolumeClaim:
            claimName: ${k8s_build_images_pvc}
            readOnly: false
        - name: tmp
          emptyDir:
            medium: Memory
            sizeLimit: 2Gi
      restartPolicy: Never
  backoffLimit: 0
---
apiVersion: v1
kind: Service
metadata:
  name: ${port_name}
  namespace: ${k8s_jobs_namespace}
spec:
  selector:
    job: ${port_name}
  ports:
    - protocol: TCP
      port: 22
      targetPort: 22
EOF
}

_docker_boot() {
	docker run -d \
		-v "$self/$base":/base:ro \
		--mount type=tmpfs,destination=/var/tmp \
		--device /dev/kvm \
		--cpus=2 \
		-p 127.0.0.1:$port:$port \
		--rm \
		--name "builds_job_${BUILD_JOB_ID:-unknown_$(date +"%s")}" \
		qemu /bin/${qemu:-qemu-system-$arch} \
		-m ${MEMORY:-4096} \
		-smp cpus=2 \
		-net nic,model=virtio -net user,hostfwd=tcp::$port-:$guestport \
		-display none \
		-device virtio-rng-pci \
		-device virtio-balloon \
		-drive file="$wd/$arch/root.img.qcow2",media=disk,snapshot=on,${driveopts:-if=virtio} \
		"$@" > /tmp/docker-$port.id
}

_qemu_boot() {
	${qemu:-qemu-system-$arch} \
		-pidfile /tmp/qemu-$port.id \
		-m ${MEMORY:-4096} \
		-smp cpus=2 \
		-net nic,model=virtio -net user,hostfwd=tcp:127.0.0.1:$port-:$guestport \
		-display none \
		-device virtio-rng-pci \
		-device virtio-balloon \
		-drive file="$wd/$arch/root.img.qcow2",media=disk,snapshot=on,${driveopts:-if=virtio} \
		"$@" &
}

_qemu_chroot_boot() {
	qemu-chroot \
		-p /tmp/qemu-$port.id \
		-b "$self/$base":/base \
		-b /var/tmp:/var/tmp \
		/bin/${qemu:-qemu-system-$arch} \
		-m ${MEMORY:-4096} \
		-smp cpus=2 \
		-net nic,model=virtio -net user,hostfwd=tcp:127.0.0.1:$port-:$guestport \
		-display none \
		-device virtio-rng-pci \
		-device virtio-balloon \
		-drive file="$wd/$arch/root.img.qcow2",media=disk,snapshot=on,${driveopts:-if=virtio} \
		"$@" &
}

_qemu_kvm_boot() {
	qemu-kvm \
		-pidfile /tmp/qemu-$port.id \
		-m ${MEMORY:-4096} \
		-smp cpus=2 \
		-net nic,model=virtio -net user,hostfwd=tcp:127.0.0.1:$port-:$guestport \
		-display none \
		-device virtio-rng-pci \
		-device virtio-balloon \
		-drive file="$wd/$arch/root.img.qcow2",media=disk,snapshot=on,${driveopts:-if=virtio} \
		"$@" &
}

_boot() {
	if [ "$means" = "docker" ]
	then
		_docker_boot "$@"
	elif [ "$means" = "k8s" ]
	then
		_k8s_boot "$@"
	elif [ "$means" = "qemu" ]
	then
		_qemu_boot "$@"
	elif [ "$means" = "qemu-chroot" ]
	then
		_qemu_chroot_boot "$@"
	elif [ "$means" = "qemu-kvm" ]
	then
		_qemu_kvm_boot "$@"
	fi
}

cmd_boot() {
	arch=$1
	shift

	if [ "$arch" = "default" ]
	then
		arch="$default_arch"
	fi
	if [ ! -e "$images/$base/$arch/root.img.qcow2" ]
	then
		printf "Image '%s' is not available for arch '%s'\n" "$base" "$arch" >&2
		exit 1
	fi

	port=$1
	if [ "$#" -gt 1 ]
	then
		means=$2
	else
		means="${default_means:-docker}"
	fi

	if [ "$means" = "docker" ]
	then
		wd="/base"
	elif [ "$means" = "k8s" ]
	then
		wd="/var/lib/images/$base"
	elif [ "$means" = "qemu" ]
	then
		wd="$self/$base"
	elif [ "$means" = "qemu-chroot" ]
	then
		wd="/base"
	elif [ "$means" = "qemu-kvm" ]
	then
		wd="$self/$base"
	else
		printf "Unknown boot mode '%s'\n" "$means" >&2
		exit 1
	fi

	boot
}

_wait_boot() {
	port=$1
	attempts=0
	echo "Waiting for VM to come up..."
	while ! guest_ssh -p "$port" build@localhost echo Hello world 2>&1 >/dev/null
	do
		sleep 5
		attempts=$((attempts + 1))
		if [ "$attempts" -eq 20 ]
		then
			echo "Giving up."
			cmd_cleanup "$port"
			exit 1
		fi
		echo "Attempt $attempts..."
	done
}

cmd_cleanup() {
	port=$1
	# Power off
	if [ "$#" -eq 1 ]
	then
		if [ -e /tmp/docker-$port.id ]
		then
			cid=$(cat /tmp/docker-$port.id)
			guest_ssh -p $port build@localhost $poweroff_cmd || true
			sleep 2
			docker kill $cid && sleep 2 || true
			rm /tmp/docker-$port.id
		fi
		if [ -e /tmp/qemu-$port.id ]
		then
			cid=$(cat /tmp/qemu-$port.id)
			guest_ssh -p $port build@localhost $poweroff_cmd || true
			sleep 2
			kill $cid || true
			rm -f /tmp/qemu-$port.id
		fi
		if [ -e /tmp/k8s-$port.id ]
		then
			guest_ssh -p $port build@localhost $poweroff_cmd || true
			sleep 2
			kubectl delete --timeout=5s --ignore-not-found=true -f /tmp/k8s-$port.id || true
			rm -f /tmp/k8s-$port.id
		fi
	fi
}

if ! [ -e "$self/$base/functions" ]
then
	printf "Missing base image functions '%s'\n" "$base" >&2
	exit 1
fi

. $self/$base/functions

case "$cmd" in
	boot)
		cmd_boot "$@"
		;;
	cleanup)
		cmd_cleanup "$@"
		;;
	sanity-check)
		sanity_check "$@"
		;;
	install)
		install "$@"
		;;
	add-repo)
		add_repository "$@"
		;;
	ssh)
		port=$1
		shift
		guest_ssh -p "$port" build@localhost "$@"
		;;
	*)
		printf "Unknown command '%s'\n" "$cmd" >&2
		exit 1
		;;
esac

A services/builds.sr.ht-worker/docker-entrypoint.sh => services/builds.sr.ht-worker/docker-entrypoint.sh +6 -0
@@ 0,0 1,6 @@
#!/bin/sh
set -e

envsubst < /etc/sr.ht/config.ini > config.ini

exec doas -u builds /usr/bin/builds.sr.ht-worker -workers 10 -config config.ini

A services/builds.sr.ht-worker/ext.yaml => services/builds.sr.ht-worker/ext.yaml +14 -0
@@ 0,0 1,14 @@
apiVersion: v1
kind: Service
metadata:
  name: ext-test
  namespace: build-jobs
spec:
  ports:
  - port: 22
    protocol: TCP
    targetPort: 22115
  selector:
    job: builds-port-22115
  externalIPs:
    - 46.23.81.204

A services/builds.sr.ht-worker/image-control.conf => services/builds.sr.ht-worker/image-control.conf +7 -0
@@ 0,0 1,7 @@
default_means="k8s"
images="/run/builds.sr.ht/images"
k8s_port_prefix="builds-port-"
k8s_jobs_namespace="build-jobs"
k8s_qemu_image_ref="registry.gensokyo:5000/qemu:latest"
k8s_build_images_pvc="cephfs-build-images-worker-pvc"
k8s_kvm_resource="sr.ht/kvm"

A services/builds.sr.ht-worker/jobs.yaml => services/builds.sr.ht-worker/jobs.yaml +33 -0
@@ 0,0 1,33 @@
apiVersion: v1
kind: Namespace
metadata:
  name: build-jobs

---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: build-jobs-manager
  namespace: build-jobs
rules:
- apiGroups: [""]
  resources: ["pods", "services"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["batch", "extensions"]
  resources: ["jobs"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]

---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: build-runner-job-manager
  namespace: build-jobs
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: build-jobs-manager
subjects:
- kind: ServiceAccount
  name: build-runner
  namespace: default

A services/builds.sr.ht-worker/kvm.yaml => services/builds.sr.ht-worker/kvm.yaml +28 -0
@@ 0,0 1,28 @@
apiVersion: v1
kind: PersistentVolume
metadata:
  name: pv-kvm
spec:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 1Gi
  hostPath:
    path: /dev/kvm
    type: CharDevice
  persistentVolumeReclaimPolicy: Retain
---

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: pvc-kvm
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 1Gi
  # volumeName should be same as PV name
  volumeName: pv-kvm
  storageClassName: ""

A services/builds.sr.ht-worker/pod.yaml => services/builds.sr.ht-worker/pod.yaml +50 -0
@@ 0,0 1,50 @@
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: buildsrht-worker
  name: buildsrht-worker-test
spec:
  serviceAccountName: build-runner
  containers:
  - args: ["web"]
    image: registry.gensokyo:5000/builds.sr.ht-worker:latest
    name: buildsrht-worker
    volumeMounts:
    - name: config
      mountPath: "/etc/sr.ht/"
      readOnly: true
    - name: build-images
      mountPath: "/run/builds.sr.ht/images/"
      readOnly: true
    env:
      - name: BUILDSRHT_DB_PASS
        valueFrom:
          secretKeyRef:
            name: postgres-credentials
            key: superUserPassword
  volumes:
  - name: config
    projected:
      sources:
      - secret:
          name: site-key-test
      - configMap:
          name: config.ini.builds
  - name: build-images
    persistentVolumeClaim:
      claimName: cephfs-build-images-pvc
      readOnly: true

---
apiVersion: v1
kind: Service
metadata:
  name: buildsrht-runner
spec:
  selector:
    app: buildsrht-worker
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080

A services/builds.sr.ht-worker/pv_build_images.yaml => services/builds.sr.ht-worker/pv_build_images.yaml +29 -0
@@ 0,0 1,29 @@
# This is the same as cephfs-build-images-pv, but even though PV are not namespaced, they
# can only be claimed in one namespace, so this just creates another PV which then gets
# claimed in the build-jobs namespace
apiVersion: v1
kind: PersistentVolume
metadata:
  name: cephfs-build-images-worker-pv
spec:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 300Gi
  csi:
    driver: cephfs.csi.ceph.com
    nodeStageSecretRef:
      name: csi-cephfs-build-images-secret
      namespace: ceph
    volumeAttributes:
      # Required options from storageclass parameters need to be added in volumeAttributes
      "clusterID": "da256dbb-b2c3-4570-ad6f-7712693a547c"
      "fsName": "cephfs"
      "staticVolume": "true"
      "rootPath": /volumes/csi/build-images/4eb739fe-81ab-4536-9d2c-fe196c37dc53
    # volumeHandle can be anything, need not to be same
    # as PV name or volume name. keeping same for brevity
    volumeHandle: cephfs-build-images-worker-pv
  persistentVolumeReclaimPolicy: Retain
  volumeMode: Filesystem
  storageClassName: sc-cephfs

A services/builds.sr.ht-worker/pvc_build_images.yaml => services/builds.sr.ht-worker/pvc_build_images.yaml +15 -0
@@ 0,0 1,15 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: cephfs-build-images-worker-pvc
  namespace: build-jobs
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 300Gi
  volumeMode: Filesystem
  # volumeName should be same as PV name
  volumeName: cephfs-build-images-worker-pv
  storageClassName: sc-cephfs

A services/builds.sr.ht-worker/sa.yaml => services/builds.sr.ht-worker/sa.yaml +4 -0
@@ 0,0 1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
  name: build-runner

A services/builds.sr.ht/Dockerfile => services/builds.sr.ht/Dockerfile +14 -0
@@ 0,0 1,14 @@
FROM alpine:3.17

RUN sed -i "1s/^/https:\/\/mirror.sr.ht\/alpine\/v3.17\/sr.ht\n/" /etc/apk/repositories
RUN wget -q -O /etc/apk/keys/alpine@sr.ht.rsa.pub https://mirror.sr.ht/alpine/alpine@sr.ht.rsa.pub

# TODO: starting w/ alpine 3.18, envsubst was moved to gettext-envsubst
RUN apk update && apk --no-cache add builds.sr.ht gettext

RUN mkdir /var/run/builds.sr.ht && chown builds:builds /var/run/builds.sr.ht
WORKDIR /var/run/builds.sr.ht

COPY docker-entrypoint.sh /usr/local/bin/
COPY jobs.py /usr/lib/python3.10/site-packages/buildsrht/blueprints/
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

A services/builds.sr.ht/docker-entrypoint.sh => services/builds.sr.ht/docker-entrypoint.sh +12 -0
@@ 0,0 1,12 @@
#!/bin/sh
set -e

envsubst < /etc/sr.ht/config.ini > config.ini

if [ "$1" == "web" ]; then
	exec env PORT=5002 /usr/bin/gunicorn buildsrht.app:app
elif [ "$1" == "api" ]; then
	exec /usr/bin/buildssrht-api
fi
echo "error: first argument must be 'web' or 'api'"
exit 1

A services/builds.sr.ht/jobs.py => services/builds.sr.ht/jobs.py +509 -0
@@ 0,0 1,509 @@
from ansi2html import Ansi2HTMLConverter
from buildsrht.manifest import Manifest
from buildsrht.rss import generate_feed
from buildsrht.runner import submit_build, requires_payment
from buildsrht.search import apply_search
from buildsrht.types import Job, JobStatus, Task, TaskStatus, User, Visibility
from datetime import datetime, timedelta
from flask import Blueprint, render_template, request, abort, redirect
from flask import Response, url_for
from markupsafe import Markup, escape
from prometheus_client import Counter
from srht.cache import get_cache, set_cache
from srht.config import cfg, get_origin
from srht.crypto import encrypt_request_authorization
from srht.database import db
from srht.flask import paginate_query, session
from srht.oauth import current_user, loginrequired, UserType
from srht.redis import redis
from srht.validation import Validation
import sqlalchemy as sa
import hashlib
import requests
import yaml
import json
import textwrap

jobs = Blueprint("jobs", __name__)

metrics = type("metrics", tuple(), {
    c.describe()[0].name: c
    for c in [
        Counter("buildsrht_logcache_hit", "Number of log cache hits"),
        Counter("buildsrht_logcache_miss", "Number of log cache misses"),
    ]
})

requests_session = requests.Session()

def get_access(job, user=None):
    user = user or current_user

    # Anonymous
    if not user:
        if job.visibility == Visibility.PRIVATE:
            return False
        return True

    # Owner
    if user.id == job.owner_id:
        return True

    if job.visibility == Visibility.PRIVATE:
        return False
    return True

def tags(tags):
    if not tags:
        return list()
    previous = list()
    results = list()
    for tag in tags.split("/"):
        results.append({
            "name": tag,
            "url": "/" + "/".join(previous + [tag])
        })
        previous.append(tag)
    return results

status_map = {
    JobStatus.pending: "text-info",
    JobStatus.queued: "text-info",
    JobStatus.success: "text-success",
    JobStatus.failed: "text-danger",
    JobStatus.running: "text-info icon-spin",
    JobStatus.timeout: "text-danger",
    JobStatus.cancelled: "text-warning",
    TaskStatus.success: "text-success",
    TaskStatus.failed: "text-danger",
    TaskStatus.running: "text-primary icon-spin",
    TaskStatus.pending: "text-info",
    TaskStatus.skipped: "text-muted",
}

icon_map = {
    JobStatus.pending: "clock",
    JobStatus.queued: "clock",
    JobStatus.success: "check",
    JobStatus.failed: "times",
    JobStatus.running: "circle-notch",
    JobStatus.timeout: "clock",
    JobStatus.cancelled: "times",
    TaskStatus.success: "check",
    TaskStatus.failed: "times",
    TaskStatus.running: "circle-notch",
    TaskStatus.pending: "circle",
    TaskStatus.skipped: "minus",
}

def get_jobs(jobs, terms):
    jobs = jobs.order_by(Job.created.desc())
    if terms:
        jobs = apply_search(jobs, terms)
    return jobs

def jobs_for_feed(jobs):
    terms = request.args.get("search")
    try:
        jobs = get_jobs(jobs, terms)
    except ValueError:
        jobs = jobs.filter(False)

    if terms is not None and "status:" not in terms:
        # by default, return only terminated jobs in feed
        terminated_statuses = [
            JobStatus.success,
            JobStatus.cancelled,
            JobStatus.failed,
            JobStatus.timeout,
        ]
        jobs = jobs.filter(Job.status.in_(terminated_statuses))
    return jobs, terms

def jobs_page(jobs, sidebar="sidebar.html", **kwargs):
    search = request.args.get("search")
    search_error = None

    try:
        jobs = (get_jobs(jobs, search))
    except ValueError as ex:
        search_error = str(ex)

    jobs = jobs.options(sa.orm.joinedload(Job.tasks))
    jobs, pagination = paginate_query(jobs)
    return render_template("jobs.html",
        jobs=jobs, status_map=status_map, icon_map=icon_map, tags=tags,
        sort_tasks=lambda tasks: sorted(tasks, key=lambda t: t.id),
        sidebar=sidebar, search=search, search_error=search_error,
        **pagination, **kwargs)

def jobs_feed(jobs, title, endpoint, **urlvalues):
    jobs, terms = jobs_for_feed(jobs)
    if terms is not None:
        title = f"{title} (filtered by: {terms})"
    description = title
    origin = cfg("builds.sr.ht", "origin")
    assert "search" not in urlvalues
    if terms is not None:
        urlvalues["search"] = terms
    link = origin + url_for(endpoint, **urlvalues)
    jobs=jobs.options(sa.orm.joinedload(Job.owner))
    return generate_feed(jobs, title, link, description)

badge_success = """
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h71v20H0z"/><path fill="#4c1" d="M71 0h53v20H71z"/><path fill="url(#b)" d="M0 0h124v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="365" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">__NAME__</text><text x="365" y="140" transform="scale(.1)" textLength="610">__NAME__</text><text x="965" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">success</text><text x="965" y="140" transform="scale(.1)" textLength="430">success</text></g></svg>
"""

badge_failure = """
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h71v20H0z"/><path fill="#e05d44" d="M71 0h53v20H71z"/><path fill="url(#b)" d="M0 0h124v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="365" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">__NAME__</text><text x="365" y="140" transform="scale(.1)" textLength="610">__NAME__</text><text x="965" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">failure</text><text x="965" y="140" transform="scale(.1)" textLength="430">failure</text></g></svg>
"""

badge_unknown = """
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="132" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="132" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h71v20H0z"/><path fill="#9f9f9f" d="M71 0h61v20H71z"/><path fill="url(#b)" d="M0 0h132v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="365" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">__NAME__</text><text x="365" y="140" transform="scale(.1)" textLength="610">__NAME__</text><text x="1005" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">unknown</text><text x="1005" y="140" transform="scale(.1)" textLength="510">unknown</text></g> </svg>
"""

def svg_page(jobs):
    name = request.args.get("name",
            default=cfg("sr.ht", "site-name"))
    job = (get_jobs(jobs, None)
        .filter(Job.status.in_([
            JobStatus.success,
            JobStatus.failed,
            JobStatus.timeout]))
        .first())
    if not job:
        badge = badge_unknown.replace("__NAME__", escape(name))
    elif job.status == JobStatus.success:
        badge = badge_success.replace("__NAME__", escape(name))
    else:
        badge = badge_failure.replace("__NAME__", escape(name))
    return badge

@jobs.route("/")
def index():
    if not current_user:
        return render_template("index-logged-out.html")
    origin = cfg("builds.sr.ht", "origin")
    rss_feed = {
        "title": f"{current_user.username}'s jobs",
        "url": origin + url_for("jobs.user_rss",
                                username=current_user.username,
                                search=request.args.get("search")),
    }
    return jobs_page(
            Job.query.filter(Job.owner_id == current_user.id),
            "index.html", rss_feed=rss_feed)


@jobs.route("/submit")
@loginrequired
def submit_GET():
    if request.args.get("manifest"):
        manifest = request.args.get("manifest")
    else:
        manifest = session.pop("manifest", default=None)
    if request.args.get("note"):
        note = request.args.get("note")
    else:
        note = session.pop("note", default=None)
    note_rows = len(note.splitlines()) if isinstance(note, str) else 1
    status = 200
    payment_required = requires_payment(current_user)
    if payment_required:
        status = 402
    return render_template("submit.html",
            manifest=manifest,
            note=note,
            note_rows=note_rows,
            payment_required=payment_required), status

def addsuffix(note: str, suffix: str) -> str:
    """
    Given a note and a suffix, return the note with the suffix concatenated/

    The returned string is guaranteed to fit in the Job.note DB field.
    """
    maxlen = Job.note.prop.columns[0].type.length
    assert len(suffix) + 1 <= maxlen, f"Suffix was too long ({len(suffix)})"
    if note.endswith(suffix) or not note:
        return note
    result = f"{note} {suffix}"
    if len(result) <= maxlen:
        return result
    note = textwrap.shorten(note, maxlen - len(suffix) - 1, placeholder="…")
    return f"{note} {suffix}"

@jobs.route("/resubmit/<int:job_id>")
@loginrequired
def resubmit_GET(job_id):
    job = Job.query.filter(Job.id == job_id).one_or_none()
    if not job:
        abort(404)
    session["manifest"] = job.manifest
    if isinstance(job.note, str) and len(job.note.splitlines()) == 1:
        note = addsuffix(job.note, "(resubmitted)")
    else:
        note = job.note
    session["note"] = note
    return redirect("/submit")

@jobs.route("/submit", methods=["POST"])
@loginrequired
def submit_POST():
    valid = Validation(request)
    _manifest = valid.require("manifest", friendly_name="Manifest")
    max_len = Job.manifest.prop.columns[0].type.length
    note = valid.optional("note", default="Submitted on the web")
    valid.expect(not _manifest or len(_manifest) < max_len,
            "Manifest must be less than {} bytes".format(max_len),
            field="manifest")
    visibility = valid.require("visibility")
    payment_required = requires_payment(current_user)
    valid.expect(not payment_required,
            "A paid account is required to submit new jobs")
    if not valid.ok:
        return render_template("submit.html", **valid.kwargs)
    try:
        manifest = Manifest(yaml.safe_load(_manifest))
    except Exception as ex:
        valid.error(str(ex), field="manifest")
        return render_template("submit.html", **valid.kwargs)
    job_id = submit_build(current_user, _manifest, note=note,
            visibility=visibility)
    return redirect("/~" + current_user.username + "/job/" + str(job_id))

@jobs.route("/cancel/<int:job_id>", methods=["POST"])
@loginrequired
def cancel(job_id):
    job = Job.query.filter(Job.id == job_id).one_or_none()
    if not job:
        abort(404)
    if job.owner_id != current_user.id and current_user.user_type != UserType.admin:
        abort(401)
    requests_session.post(f"http://{job.runner}/job/{job.id}/cancel")
    return redirect("/~" + current_user.username + "/job/" + str(job.id))

@jobs.route("/~<username>")
def user(username):
    user = User.query.filter(User.username == username).first()
    if not user:
        abort(404)
    jobs = Job.query.filter(Job.owner_id == user.id)
    if not current_user or user.id != current_user.id:
        jobs = jobs.filter(Job.visibility == Visibility.PUBLIC)
    origin = cfg("builds.sr.ht", "origin")
    rss_feed = {
        "title": f"{user.username}'s jobs",
        "url": origin + url_for("jobs.user_rss", username=username,
                                search=request.args.get("search")),
    }
    return jobs_page(jobs, user=user, breadcrumbs=[
        { "name": "~" + user.username, "link": "" }
    ], rss_feed=rss_feed)

@jobs.route("/~<username>/rss.xml")
def user_rss(username):
    user = User.query.filter(User.username == username).first()
    if not user:
        abort(404)
    jobs = Job.query.filter(Job.owner_id == user.id)
    if not current_user or user.id != current_user.id:
        jobs = jobs.filter(Job.visibility == Visibility.PUBLIC)
    return jobs_feed(jobs, f"{user.username}'s jobs",
                     "jobs.user", username=username)

@jobs.route("/~<username>.svg")
def user_svg(username):
    key = f"builds.sr.ht.svg.user.{username}"
    badge = redis.get(key)
    if not badge:
        user = User.query.filter(User.username == username).first()
        if not user:
            abort(404)
        jobs = Job.query.filter(Job.owner_id == user.id)
        badge = svg_page(jobs).encode()
        redis.setex(key, timedelta(seconds=30), badge)
    return Response(badge, mimetype="image/svg+xml", headers={
        "Cache-Control": "no-cache",
        "ETag": hashlib.sha1(badge).hexdigest(),
    })

@jobs.route("/~<username>/<path:path>")
def tag(username, path):
    user = User.query.filter(User.username == username).first()
    if not user:
        abort(404)
    jobs = Job.query.filter(Job.owner_id == user.id)\
        .filter(Job.tags.ilike(path + "%"))
    if not current_user or current_user.id != user.id:
        jobs = jobs.filter(Job.visibility == Visibility.PUBLIC)
    origin = cfg("builds.sr.ht", "origin")
    rss_feed = {
        "title": "/".join([f"~{user.username}"] +
                          [t["name"] for t in tags(path)]),
        "url": origin + url_for("jobs.tag_rss", username=username, path=path,
                                search=request.args.get("search")),
    }
    return jobs_page(jobs, user=user, breadcrumbs=[
        { "name": "~" + user.username, "url": "" }
    ] + tags(path), rss_feed=rss_feed)

@jobs.route("/~<username>/<path:path>/rss.xml")
def tag_rss(username, path):
    user = User.query.filter(User.username == username).first()
    if not user:
        abort(404)
    jobs = Job.query.filter(Job.owner_id == user.id)\
        .filter(Job.tags.ilike(path + "%"))
    if not current_user or current_user.id != user.id:
        jobs = jobs.filter(Job.visibility == Visibility.PUBLIC)
    base_title = "/".join([f"~{user.username}"] +
                          [t["name"] for t in tags(path)])
    return jobs_feed(jobs, base_title + " jobs",
                     "jobs.tag", username=username, path=path)

@jobs.route("/~<username>/<path:path>.svg")
def tag_svg(username, path):
    key = f"builds.sr.ht.svg.tag.{username}"
    badge = redis.get(key)
    if not badge:
        user = User.query.filter(User.username == username).first()
        if not user:
            abort(404)
        jobs = Job.query.filter(Job.owner_id == user.id)\
            .filter(Job.tags.ilike(path + "%"))
        badge = svg_page(jobs).encode()
        redis.setex(key, timedelta(seconds=30), badge)
    return Response(badge, mimetype="image/svg+xml", headers={
        "Cache-Control": "no-cache",
        "ETag": hashlib.sha1(badge).hexdigest(),
    })

log_max = 131072

ansi = Ansi2HTMLConverter(scheme="mint-terminal", linkify=True)

def logify(text, task, log_url):
    text = ansi.convert(text, full=False)
    if len(text) >= log_max:
        text = text[-log_max:]
        try:
            text = text[text.index('\n')+1:]
        except ValueError:
            pass
        nlines = len(text.splitlines())
        text = (Markup('<pre>')
                + Markup('<span class="text-muted">'
                    'This is a big file! Only the last 128KiB is shown. '
                    f'<a target="_blank" href="{escape(log_url)}">'
                        'Click here to download the full log</a>.'
                    '</span>\n\n')
                + Markup(text)
                + Markup('</pre>'))
        linenos = Markup('<pre>\n\n\n')
    else:
        nlines = len(text.splitlines())
        text = (Markup('<pre>')
                + Markup(text)
                + Markup('</pre>'))
        linenos = Markup('<pre>')
    for no in range(1, nlines + 1):
        linenos += Markup(f'<a href="#{escape(task)}-{no-1}" '
                + f'id="{escape(task)}-{no-1}">{no}</a>')
        if no != nlines:
            linenos += Markup("\n")
    linenos += Markup("</pre>")
    return (Markup('<td>')
            + linenos
            + Markup('</td><td>')
            + Markup(ansi.produce_headers())
            + text
            + Markup('</td>'))

@jobs.route("/~<username>/job/<int:job_id>")
def job_by_id(username, job_id):
    user = User.query.filter(User.username == username).first()
    if not user:
        abort(404)
    job = Job.query.options(sa.orm.joinedload(Job.tasks)).get(job_id)
    if not job:
        abort(404)
    if not get_access(job):
        abort(404)
    if job.owner_id != user.id:
        abort(404)
    logs = list()
    build_user = cfg("git.sr.ht::dispatch", "/usr/bin/buildsrht-keys", "builds:builds").split(":")[0]
    final_status = [
        TaskStatus.success,
        TaskStatus.failed,
        TaskStatus.skipped,
        JobStatus.success,
        JobStatus.timeout,
        JobStatus.failed,
        JobStatus.cancelled,
    ]
    def get_log(log_url, name, status):
        cachekey = f"builds.sr.ht:logs:{log_url}"
        log = get_cache(cachekey)
        if log:
            metrics.buildsrht_logcache_hit.inc()
            log = json.loads(log)
            log["log"] = Markup(log["log"])
        if not log:
            metrics.buildsrht_logcache_miss.inc()
            try:
                r = requests_session.head(log_url,
                  headers=encrypt_request_authorization())
                cl = int(r.headers["Content-Length"])
                if cl > log_max:
                    r = requests_session.get(log_url, headers={
                        "Range": f"bytes={cl-log_max}-{cl-1}",
                        **encrypt_request_authorization(),
                    }, timeout=3)
                else:
                    r = requests_session.get(log_url, timeout=3,
                        headers=encrypt_request_authorization())
                if r.status_code >= 200 and r.status_code <= 299:
                    log = {
                        "name": name,
                        "log": logify(r.content.decode('utf-8', errors='replace'),
                            "task-" + name if name else "setup", log_url),
                        "more": True,
                    }
                else:
                    raise Exception()
            except:
                log = {
                    "name": name,
                    "log": Markup('<td></td><td><pre><strong class="text-danger">'
                        f'Error fetching logs for task "{escape(name)}"</strong>'
                        '</pre></td>'),
                    "more": False,
                }
            if status in final_status:
                set_cache(cachekey, timedelta(days=2), json.dumps(log))
        logs.append(log)
        return log["more"]
    origin = cfg("builds.sr.ht", "api-origin", default=get_origin("builds.sr.ht"))
    log_url = f"{origin}/query/log/{job.id}/log"
    if get_log(log_url, None, job.status):
        for task in sorted(job.tasks, key=lambda t: t.id):
            if task.status == TaskStatus.pending:
                continue
            log_url = f"{origin}/query/log/{job.id}/{task.name}/log"
            if not get_log(log_url, task.name, task.status):
                break
    min_artifact_date = datetime.utcnow() - timedelta(days=90)
    if current_user:
        payment_required = requires_payment(current_user)
    else:
        payment_required = True
    return render_template("job.html",
            job=job, logs=logs,
            build_user=build_user,
            status_map=status_map,
            icon_map=icon_map,
            sort_tasks=lambda tasks: sorted(tasks, key=lambda t: t.id),
            min_artifact_date=min_artifact_date,
            payment_required=payment_required)

A services/builds.sr.ht/pod.yaml => services/builds.sr.ht/pod.yaml +109 -0
@@ 0,0 1,109 @@
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: buildsrht
  name: buildsrht-test
spec:
  containers:
  - args: ["web"]
    image: registry.gensokyo:5000/builds.sr.ht:latest
    name: buildsrht
    volumeMounts:
    - name: config
      mountPath: "/etc/sr.ht/"
      readOnly: true
    env:
      - name: BUILDSRHT_DB_PASS
        valueFrom:
          secretKeyRef:
            name: postgres-credentials
            key: superUserPassword
  volumes:
  - name: config
    projected:
      sources:
      - secret:
          name: site-key-test
      - configMap:
          name: config.ini.builds

---
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: buildsrht-api
  name: buildsrht-api-test
spec:
  containers:
  - args: ["api"]
    image: registry.gensokyo:5000/builds.sr.ht:latest
    name: buildsrht-api
    volumeMounts:
    - name: config
      mountPath: "/etc/sr.ht/"
      readOnly: true
    env:
      - name: BUILDSRHT_DB_PASS
        valueFrom:
          secretKeyRef:
            name: postgres-credentials
            key: superUserPassword
  volumes:
  - name: config
    projected:
      sources:
      - secret:
          name: site-key-test
      - configMap:
          name: config.ini.builds

---
apiVersion: v1
kind: Service
metadata:
  name: buildsrht
spec:
  selector:
    app: buildsrht
  ports:
    - protocol: TCP
      port: 5002
      targetPort: 5002
---
apiVersion: v1
kind: Service
metadata:
  name: buildsrht-api
spec:
  selector:
    app: buildsrht-api
  ports:
    - protocol: TCP
      port: 5102
      targetPort: 5102
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: buildsrht
spec:
  rules:
  - host: builds.sr.ht
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: buildsrht
            port:
              number: 5002
      - path: /query
        pathType: Prefix
        backend:
          service:
            name: buildsrht-api
            port:
              number: 5102

A services/builds.sr.ht/redis.yaml => services/builds.sr.ht/redis.yaml +34 -0
@@ 0,0 1,34 @@
apiVersion: apps/v1
kind: Deployment
metadata:
  name: buildsrht-redis
  labels:
    app.kubernetes.io/name: buildsrht-redis
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: buildsrht-redis
  template:
    metadata:
      labels:
        app.kubernetes.io/name: buildsrht-redis
    spec:
      containers:
      - name: redis
        image: registry.gensokyo:5000/redis:latest
        ports:
        - containerPort: 6379

---
apiVersion: v1
kind: Service
metadata:
  name: buildsrht-redis
spec:
  selector:
    app.kubernetes.io/name: buildsrht-redis
  ports:
    - protocol: TCP
      port: 6379
      targetPort: 6379

A services/meta.sr.ht/Dockerfile => services/meta.sr.ht/Dockerfile +13 -0
@@ 0,0 1,13 @@
FROM alpine:3.17

RUN sed -i "1s/^/https:\/\/mirror.sr.ht\/alpine\/v3.17\/sr.ht\n/" /etc/apk/repositories
RUN wget -q -O /etc/apk/keys/alpine@sr.ht.rsa.pub https://mirror.sr.ht/alpine/alpine@sr.ht.rsa.pub

# TODO: starting w/ alpine 3.18, envsubst was moved to gettext-envsubst
RUN apk update && apk --no-cache add meta.sr.ht gettext

RUN mkdir /var/run/meta.sr.ht && chown meta:meta /var/run/meta.sr.ht
WORKDIR /var/run/meta.sr.ht

COPY docker-entrypoint.sh /usr/local/bin/
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]

A services/meta.sr.ht/docker-entrypoint.sh => services/meta.sr.ht/docker-entrypoint.sh +12 -0
@@ 0,0 1,12 @@
#!/bin/sh
set -e

envsubst < /etc/sr.ht/config.ini > config.ini

if [ "$1" == "web" ]; then
	exec env PORT=5000 /usr/bin/gunicorn metasrht.app:app
elif [ "$1" == "api" ]; then
	exec /usr/bin/metasrht-api
fi
echo "error: first argument must be 'web' or 'api'"
exit 1

A services/meta.sr.ht/pod.yaml => services/meta.sr.ht/pod.yaml +102 -0
@@ 0,0 1,102 @@
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: metasrht
  name: metasrht-test
spec:
  containers:
  - args: ["web"]
    image: registry.gensokyo:5000/meta.sr.ht:latest
    name: metasrht
    volumeMounts:
    - name: config
      mountPath: "/etc/sr.ht/"
      readOnly: true
    env:
      - name: METASRHT_DB_PASS
        valueFrom:
          secretKeyRef:
            name: postgres-credentials
            key: superUserPassword
  volumes:
  - name: config
    projected:
      sources:
      - secret:
          name: site-key-test
      - configMap:
          name: config.ini.meta

---
apiVersion: v1
kind: Pod
metadata:
  labels:
    app: metasrht-api
  name: metasrht-api-test
spec:
  containers:
  - args: ["api"]
    image: registry.gensokyo:5000/meta.sr.ht:latest
    name: metasrht-api
    volumeMounts:
    - name: config
      mountPath: "/etc/sr.ht/"
      readOnly: true
    env:
      - name: METASRHT_DB_PASS
        valueFrom:
          secretKeyRef:
            name: postgres-credentials
            key: superUserPassword
  volumes:
  - name: config
    projected:
      sources:
      - secret:
          name: site-key-test
      - configMap:
          name: config.ini.meta

---
apiVersion: v1
kind: Service
metadata:
  name: metasrht
spec:
  selector:
    app: metasrht
  ports:
    - protocol: TCP
      port: 5000
      targetPort: 5000
---
apiVersion: v1
kind: Service
metadata:
  name: metasrht-api
spec:
  selector:
    app: metasrht-api
  ports:
    - protocol: TCP
      port: 5100
      targetPort: 5100
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: metasrht
spec:
  rules:
  - host: meta.sr.ht
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: metasrht
            port:
              number: 5000

A services/meta.sr.ht/redis.yaml => services/meta.sr.ht/redis.yaml +34 -0
@@ 0,0 1,34 @@
apiVersion: apps/v1
kind: Deployment
metadata:
  name: metasrht-redis
  labels:
    app.kubernetes.io/name: metasrht-redis
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: metasrht-redis
  template:
    metadata:
      labels:
        app.kubernetes.io/name: metasrht-redis
    spec:
      containers:
      - name: redis
        image: registry.gensokyo:5000/redis:latest
        ports:
        - containerPort: 6379

---
apiVersion: v1
kind: Service
metadata:
  name: metasrht-redis
spec:
  selector:
    app.kubernetes.io/name: metasrht-redis
  ports:
    - protocol: TCP
      port: 6379
      targetPort: 6379