~egtann/srp

f05abf17b8271e55d18eb9c8b7c1f81659d97054 — Evan Tann 1 year, 4 months ago e5fb8b4
add live backend api
6 files changed, 283 insertions(+), 58 deletions(-)

M README.md
A api.go
M cmd/srp/main.go
M go.mod
M go.sum
M proxy.go
M README.md => README.md +57 -14
@@ 11,6 11,7 @@ whole lot more.
* Automate HTTPS with TLS termination
* Load balance using a simple algorithm
* Check health automatically
* API to retrieve healthy services
* Live reloaded config file

And nothing else.


@@ 25,19 26,27 @@ Then run `srp -h` for usage help.

## Config file format

The config file maps requests to backend services.
The config file has two main parts:

1. Services maps requests to backend services.
1. API restricts access via an IP subnet.

```json
{
	"127.0.0.1:3000": {
		"HealthPath": "/health",
		"Backends": [
			"127.0.0.1:3001",
			"127.0.0.1:3002",
			"127.0.0.1:3003",
			"127.0.0.1:3004",
			"127.0.0.1:3005"
		]
	"Services": {
		"127.0.0.1:3000": {
			"HealthPath": "/health",
			"Backends": [
				"127.0.0.1:3001",
				"127.0.0.1:3002",
				"127.0.0.1:3003",
				"127.0.0.1:3004",
				"127.0.0.1:3005"
			]
		}
	},
	"API": {
		"Subnet": "10.0.0.0/24"
	}
}
```


@@ 49,12 58,46 @@ your servers every few seconds and stop sending requests to any that fail until
the health checks start succeeding. Additionally, if any single request fails,
SRP will try that same request again using a different backend (3 tries max).

## API

SRP includes a simple API to retrieve each services' healthy backends. Combined
with something like github.com/egtann/lanhttp, the API enables your apps to
communicate over an internal network, rather than through the public internet,
without re-configuring your servers or modifying DNS.

By default the API is disabled. When configured with `Subnet`, the API responds
to `/services` over the appropriate subnet with JSON resembling the following:

```
{
	"www.example.internal": {
		"HealthPath": "/health",
		"Backends": [
			"10.0.0.1:3000",
			"10.0.0.2:3000"
		]
	}
}
```

Only the healthy IPs will be returned in the API.

lanhttp or similar can help you periodically call this API to update healthy
IPs and route *.internal traffic directly to the live IPs, skipping SRP
entirely, to keep chatty internal networks from impacting the performance of
SRP.

## Why build SRP?

HAProxy, Nginx, Apache, etc. don't do automatic HTTPS. They're also very
complex, which is far beyond the need of most projects. Several new Go-based
reverse proxies that use autocert, such as Traefik and Caddy, are very large
and complex as well, with plenty of opportunity for bugs.
Complexity doesn't belong in the infrastructure layer. When something goes
wrong at this level, it can be catastrophic. You need to diagnose the issue
quickly and deliver a fix in minutes to meet your SLAs. A small, simple and
well-tested codebase is the only way to achieve that.

HAProxy, Nginx, and Apache are very complex, which is far beyond the need of
most projects. Several new Go-based reverse proxies that use autocert, such as
Traefik and Caddy, are very large and complex as well, with plenty of
opportunity for bugs.

Instead, SRP keeps it simple. There's a much smaller surface for bugs. It's
easier and faster to debug if issues occur (especially nice when they occur in

A api.go => api.go +116 -0
@@ 0,0 1,116 @@
package srp

import (
	"encoding/json"
	"errors"
	"fmt"
	"log"
	"net"
	"net/http"
	"strconv"
	"strings"
)

// RedirectHTTPHandler redirects http requests to use the API if the request
// originated from the whitelisted subnet. In all other GET and HEAD requests,
// this handler redirects to HTTPS. For POST, PUT, etc. this handler throws an
// error letting the client know to use HTTPS.
func (rp *ReverseProxy) RedirectHTTPHandler() (http.Handler, error) {
	fn := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		switch r.Method {
		case "GET", "HEAD": // Do nothing
		default:
			http.Error(w, "Use HTTPS", http.StatusBadRequest)
			return
		}
		target := "https://" + stripPort(r.Host) + r.URL.RequestURI()
		http.Redirect(w, r, target, http.StatusFound)
	})
	if rp.reg.API.Subnet == "" {
		return fn, nil
	}
	if localIP := getLocalIP(); localIP == "" {
		return fn, nil
	}
	maskedIP, mask, err := maskIP(rp.reg.API.Subnet)
	if err != nil {
		return nil, fmt.Errorf("mask: %w", err)
	}
	fn = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		switch r.Method {
		case "GET", "HEAD": // Do nothing
		default:
			http.Error(w, "Use HTTPS", http.StatusBadRequest)
			return
		}
		host, _, err := net.SplitHostPort(r.RemoteAddr)
		if err != nil {
			http.Error(w, err.Error(), http.StatusBadRequest)
			return
		}
		if maskedIP == net.ParseIP(host).Mask(mask).String() {
			if strings.TrimPrefix(r.URL.Path, "/") == "services" {
				data := map[string][]string{}
				reg := rp.cloneRegistry()
				for host, srv := range reg.Services {
					data[host] = srv.liveBackends
				}
				err := json.NewEncoder(w).Encode(data)
				if err != nil {
					log.Printf("failed to encode registry: %s", err)
				}
				return
			}
		}
		target := "https://" + stripPort(r.Host) + r.URL.RequestURI()
		http.Redirect(w, r, target, http.StatusFound)
	})
	return fn, nil
}

func stripPort(hostport string) string {
	host, _, err := net.SplitHostPort(hostport)
	if err != nil {
		return hostport
	}
	return net.JoinHostPort(host, "443")
}

// getLocalIP returns the non-loopback local IP of the host.
//
// This is taken from https://stackoverflow.com/a/31551220
func getLocalIP() string {
	addrs, err := net.InterfaceAddrs()
	if err != nil {
		return ""
	}
	for _, address := range addrs {
		// check the address type and if it is not a loopback the
		// display it
		ipnet, ok := address.(*net.IPNet)
		if ok && !ipnet.IP.IsLoopback() {
			if ipnet.IP.To4() != nil {
				return ipnet.IP.String()
			}
		}
	}
	return ""
}

func maskIP(subnet string) (string, net.IPMask, error) {
	parts := strings.SplitN(subnet, "/", 2)
	if len(parts) != 2 {
		return "", nil, errors.New("bad subnet: expected ip/mask in the form of 10.1.2.0/24")
	}
	maskBits, err := strconv.Atoi(parts[1])
	if err != nil {
		return "", nil, fmt.Errorf("bad mask: %w", err)
	}
	mask := net.CIDRMask(maskBits, 32)
	ip := net.ParseIP(parts[0])
	maskedIP := ip.Mask(mask).String()
	if maskedIP == "<nil>" {
		return "", nil, fmt.Errorf("bad masked ip: %s", parts[0])
	}
	return maskedIP, mask, nil
}

M cmd/srp/main.go => cmd/srp/main.go +12 -5
@@ 57,7 57,11 @@ func main() {
	rand.Seed(time.Now().UnixNano())
	proxy := srp.NewProxy(&logger{}, reg)

	// Set up the API only if Subnet is configured and the internal
	// IP of the SRP server can be determined.
	srv := &http.Server{
		// TODO(egtann) wrap proxy to allow API requests over the
		// whitelisted subnet
		Handler:        proxy,
		ReadTimeout:    timeout,
		WriteTimeout:   timeout,


@@ 79,14 83,18 @@ func main() {
			return cert, err
		}
		srv.TLSConfig = &tls.Config{GetCertificate: getCert}
		apiHandler, err := proxy.RedirectHTTPHandler()
		if err != nil {
			log.Fatal(err)
		}
		go func() {
			err = http.ListenAndServe(":http", m.HTTPHandler(nil))
			err = http.ListenAndServe(":80", m.HTTPHandler(apiHandler))
			if err != nil {
				log.Fatal(fmt.Printf("listen and serve: %s", err))
			}
		}()
		port = "443"
		srv.Addr = ":https"
		srv.Addr = ":443"
		go func() {
			log.Println("serving tls")
			if err = srv.ListenAndServeTLS("", ""); err != nil {


@@ 126,11 134,9 @@ func (l *logger) ReqPrintf(reqID, format string, vals ...interface{}) {
// check when the reloaded channel receives a message, so a new health check
// with the new registry can be started.
func checkHealth(proxy *srp.ReverseProxy, sighupCh <-chan bool) {
	ticker := time.NewTicker(3 * time.Second)
	defer ticker.Stop()
	for {
		select {
		case <-ticker.C:
		case <-time.After(3 * time.Second):
			err := proxy.CheckHealth()
			if err != nil {
				log.Println("check health", err)


@@ 152,6 158,7 @@ func hotReloadConfig(
	stop := make(chan os.Signal, 1)
	signal.Notify(stop, syscall.SIGHUP)
	<-stop

	log.Println("reloading config...")
	reg, err := srp.NewRegistry(filename)
	if err != nil {

M go.mod => go.mod +5 -0
@@ 3,7 3,12 @@ module github.com/egtann/srp
go 1.13

require (
	github.com/egtann/observer v0.0.0-20190205194124-df8d12117b15 // indirect
	github.com/egtann/sjs v0.0.0-20190419155825-aa595c9effd7 // indirect
	github.com/egtann/sls v0.0.0-20190405184350-3bf86d71394b // indirect
	github.com/egtann/up v0.0.0-20190510172642-fc5bc4be8665 // indirect
	github.com/hashicorp/go-cleanhttp v0.5.1
	github.com/microcosm-cc/bluemonday v1.0.2 // indirect
	github.com/rs/xid v1.2.1
	golang.org/x/crypto v0.0.0-20190907121410-71b5226ff739
	golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 // indirect

M go.sum => go.sum +25 -0
@@ 1,10 1,35 @@
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
github.com/egtann/component v0.0.0-20181105184933-8f39bcdff390/go.mod h1:hkWT9MYe8KTv4CKKrYnDnscyfamiLK3VAPaVm3IJs5o=
github.com/egtann/observer v0.0.0-20190205194124-df8d12117b15 h1:2Ob+8rTu4+s3+9DvE6a6eLa5rco0fxjyvhXyv3gD93k=
github.com/egtann/observer v0.0.0-20190205194124-df8d12117b15/go.mod h1:95qj+w3rm9HzEgsuw3ZUVRr5ZeO+KqkmIpEtaJOom9U=
github.com/egtann/sjs v0.0.0-20190419155825-aa595c9effd7 h1:K9JeELogscOFLZ42/Wrcu2hwRE7MR0dYt4Q5Xzf38QI=
github.com/egtann/sjs v0.0.0-20190419155825-aa595c9effd7/go.mod h1:HiYt/wKQrvSw7qmzFysBxZi1V6RhGhi3EHq0mEmCSVI=
github.com/egtann/sls v0.0.0-20190405184350-3bf86d71394b h1:NJAXXMwDj8edpu8AEzkRtouwsXetcgrrKIswYyU50UM=
github.com/egtann/sls v0.0.0-20190405184350-3bf86d71394b/go.mod h1:yLnSPTTy973+2i9AVwqZI//8GHPs3C6GN+Mr42caIpY=
github.com/egtann/up v0.0.0-20181230025019-46ea6185e2ee/go.mod h1:BvZJThG7x7BtdUvj/vwdW8fJFj1ZDrEwv0Tz9vLNOZo=
github.com/egtann/up v0.0.0-20190510172642-fc5bc4be8665 h1:FuK1sPzXUH9tHvZnvR3yytA2gEG3clF6LjTPmkJr6Fs=
github.com/egtann/up v0.0.0-20190510172642-fc5bc4be8665/go.mod h1:BvZJThG7x7BtdUvj/vwdW8fJFj1ZDrEwv0Tz9vLNOZo=
github.com/go-chi/chi v4.0.1+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/justinas/alice v0.0.0-20171023064455-03f45bd4b7da h1:5y58+OCjoHCYB8182mpf/dEsq0vwTKPOo4zGfH0xW9A=
github.com/justinas/alice v0.0.0-20171023064455-03f45bd4b7da/go.mod h1:oLH0CmIaxCGXD67VKGR5AacGXZSMznlmeqM8RzPrcY8=
github.com/microcosm-cc/bluemonday v1.0.2 h1:5lPfLTTAvAbtS0VqT+94yOtFnGfUWYyx0+iToC3Os3s=
github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1-0.20181023235946-059132a15dd0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/rs/xid v1.2.1 h1:mhH9Nq+C1fY2l1XIpgxIiUOfNpRBYH1kKcr+qfKgjRc=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/rs/zerolog v1.11.0 h1:DRuq/S+4k52uJzBQciUcofXx45GrMC6yrEbb/CoK6+M=
github.com/rs/zerolog v1.11.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190907121410-71b5226ff739 h1:Gc7JIyxvWgD6m+QmVryY0MstDORNYididDGxgZ6Tnpk=
golang.org/x/crypto v0.0.0-20190907121410-71b5226ff739/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 h1:k7pJ2yAPLPgbskkFdhRCsA77k2fySZ1zf2zCjvQCiIM=
golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=

M proxy.go => proxy.go +68 -39
@@ 26,7 26,7 @@ import (
// checks pass.
type ReverseProxy struct {
	rp       httputil.ReverseProxy
	reg      Registry
	reg      *Registry
	jobCh    chan *healthCheck
	resultCh chan *healthCheck
	mu       sync.RWMutex


@@ 35,7 35,12 @@ type ReverseProxy struct {

// Registry maps hosts to backends with other helpful info, such as
// healthchecks.
type Registry map[string]*backend
type Registry struct {
	Services map[string]*backend
	API      struct {
		Subnet string
	}
}

type backend struct {
	HealthPath   string


@@ 58,7 63,7 @@ type healthCheck struct {
}

// NewProxy from a given Registry.
func NewProxy(log Logger, reg Registry) *ReverseProxy {
func NewProxy(log Logger, reg *Registry) *ReverseProxy {
	director := func(req *http.Request) {
		req.URL.Scheme = "http"
		req.URL.Host = req.Host


@@ 104,20 109,38 @@ func NewProxy(log Logger, reg Registry) *ReverseProxy {
func (r *ReverseProxy) ServeHTTP(w http.ResponseWriter, req *http.Request) {
	r.mu.RLock()
	defer r.mu.RUnlock()

	// Only allow GET and HEAD requests to the API
	switch req.Method {
	case "GET", "HEAD":
		if strings.TrimPrefix(req.URL.Path, "/") == "services" {
			reg := cloneRegistryNoLock(r.reg)
			for _, srv := range reg.Services {
				// Only show live backends
				srv.Backends = srv.liveBackends
			}
			err := json.NewEncoder(w).Encode(reg.Services)
			if err != nil {
				r.log.Printf("failed to encode registry: %s", err)
			}
			return
		}
	}
	r.rp.ServeHTTP(w, req)
	return
}

func newRegistry(r io.Reader) (Registry, error) {
func newRegistry(r io.Reader) (*Registry, error) {
	byt, err := ioutil.ReadAll(r)
	if err != nil {
		return nil, err
	}
	reg := Registry{}
	err = json.Unmarshal(byt, &reg)
	reg := &Registry{}
	err = json.Unmarshal(byt, reg)
	if err != nil {
		return nil, fmt.Errorf("unmarshal config: %w", err)
	}
	for host, v := range reg {
	for host, v := range reg.Services {
		if len(v.Backends) == 0 {
			return nil, fmt.Errorf("missing backends for %q", host)
		}


@@ 127,7 150,7 @@ func newRegistry(r io.Reader) (Registry, error) {

// NewRegistry for a given configuration file. This reports an error if any
// frontend host has no backends.
func NewRegistry(filename string) (Registry, error) {
func NewRegistry(filename string) (*Registry, error) {
	fi, err := os.Open(filename)
	if err != nil {
		return nil, fmt.Errorf("read file %s: %w", filename, err)


@@ 136,16 159,18 @@ func NewRegistry(filename string) (Registry, error) {
	return newRegistry(fi)
}

// Hosts for the registry.
func (r Registry) Hosts() []string {
// Hosts for the registry. This automatically removes *.internal domains.
func (r *Registry) Hosts() []string {
	hosts := []string{}
	for k := range r {
		hosts = append(hosts, k)
	for k := range r.Services {
		if !strings.HasSuffix(k, ".internal") {
			hosts = append(hosts, k)
		}
	}
	return hosts
}

func (r *ReverseProxy) cloneRegistry() Registry {
func (r *ReverseProxy) cloneRegistry() *Registry {
	r.mu.RLock()
	defer r.mu.RUnlock()
	return cloneRegistryNoLock(r.reg)


@@ 154,18 179,22 @@ func (r *ReverseProxy) cloneRegistry() Registry {
// cloneRegistryNoLock returns a duplicate registry without acquiring locks.
// Only to be used on existing clones within a single thread or where locking
// is provided outside the function.
func cloneRegistryNoLock(reg Registry) Registry {
	clone := make(Registry, len(reg))
	for host, fe := range reg {
func cloneRegistryNoLock(reg *Registry) *Registry {
	clone := &Registry{
		Services: make(map[string]*backend, len(reg.Services)),
		API:      reg.API,
	}
	for host, fe := range reg.Services {
		cfe := &backend{
			HealthPath: fe.HealthPath,
			Backends:   make([]string, len(fe.Backends)),
			HealthPath:   fe.HealthPath,
			Backends:     make([]string, len(fe.Backends)),
			liveBackends: make([]string, len(fe.liveBackends)),
		}
		copy(cfe.Backends, fe.Backends)
		clone[host] = cfe
		copy(cfe.liveBackends, fe.liveBackends)
		clone.Services[host] = cfe
	}
	return clone

}

// CheckHealth of backend servers in the registry concurrently, and update the


@@ 174,7 203,7 @@ func (r *ReverseProxy) CheckHealth() error {
	checks := []*healthCheck{}
	origReg := r.cloneRegistry()
	newReg := cloneRegistryNoLock(origReg)
	for host, frontend := range newReg {
	for host, frontend := range newReg.Services {
		if frontend.HealthPath == "" {
			frontend.liveBackends = frontend.Backends
			continue


@@ 202,7 231,7 @@ func (r *ReverseProxy) CheckHealth() error {
			r.log.Printf("check health: %s failed: %s", check.ip, check.err)
			continue
		}
		host := newReg[check.host]
		host := newReg.Services[check.host]
		host.liveBackends = append(host.liveBackends, check.ip)
		r.log.Printf("check health: %s 200 OK", check.ip)
	}


@@ 219,25 248,23 @@ func (r *ReverseProxy) CheckHealth() error {
	return nil
}

func diff(reg1, reg2 Registry) bool {
	for key := range reg1 {
func diff(reg1, reg2 *Registry) bool {
	for key := range reg1.Services {
		// Exit quickly if lengths are different
		if len(reg1[key].liveBackends) != len(reg2[key].liveBackends) {
		lb1 := reg1.Services[key].liveBackends
		lb2 := reg2.Services[key].liveBackends
		if len(lb1) != len(lb2) {
			return true
		}

		// Sort our live backends to get better performance when
		// diffing the live backends.
		sort.Slice(reg1[key].liveBackends, func(i, j int) bool {
			return reg1[key].liveBackends[i] < reg1[key].liveBackends[j]
		})
		sort.Slice(reg2[key].liveBackends, func(i, j int) bool {
			return reg2[key].liveBackends[i] < reg2[key].liveBackends[j]
		})
		sort.Slice(lb1, func(i, j int) bool { return lb1[i] < lb1[j] })
		sort.Slice(lb2, func(i, j int) bool { return lb2[i] < lb2[j] })

		// Compare the two and exit on the first different string
		for i, ip := range reg1[key].liveBackends {
			if reg2[key].liveBackends[i] != ip {
		for i, ip := range lb1 {
			if lb2[i] != ip {
				return true
			}
		}


@@ 247,7 274,7 @@ func diff(reg1, reg2 Registry) bool {

// UpdateRegistry for the reverse proxy with new frontends, backends, and
// health checks.
func (r *ReverseProxy) UpdateRegistry(reg Registry) {
func (r *ReverseProxy) UpdateRegistry(reg *Registry) {
	r.mu.Lock()
	defer r.mu.Unlock()
	r.reg = reg


@@ 277,16 304,18 @@ func ping(job *healthCheck) error {
	return nil
}

func newTransport(reg Registry) http.RoundTripper {
func newTransport(reg *Registry) http.RoundTripper {
	transport := cleanhttp.DefaultTransport()
	transport.ResponseHeaderTimeout = 30 * time.Second
	transport.DialContext = func(
		ctx context.Context,
		network, addr string,
	) (net.Conn, error) {
		// Trim trailing port, if any
		addrShort := strings.SplitN(addr, ":", 2)[0]
		host, ok := reg[addrShort]
		ip, _, err := net.SplitHostPort(addr)
		if err != nil {
			return nil, fmt.Errorf("split host port: %w", err)
		}
		host, ok := reg.Services[ip]
		if !ok {
			return nil, fmt.Errorf("no host for %s", addr)
		}


@@ 305,7 334,7 @@ func retryDial(network string, endpoints []string, tries int) (net.Conn, error) 
	for i := 0; i < min(tries, len(endpoints)); i++ {
		var conn net.Conn
		endpoint := endpoints[(randInt+i)%len(endpoints)]
		conn, err = net.Dial(network, endpoint+":80")
		conn, err = net.Dial(network, net.JoinHostPort(endpoint, "80"))
		if err == nil {
			return conn, nil
		}