~stepbrobd/tailscale

b4f46c31bbf8f079a0e617997e8b86f3c94247bd — Anton Tolchanov a month ago 532b261
wgengine/magicsock: export packet drop metric for outbound errors

This required sharing the dropped packet metric between two packages
(tstun and magicsock), so I've moved its definition to util/usermetric.

Updates tailscale/corp#22075

Signed-off-by: Anton Tolchanov <anton@tailscale.com>
M net/tstun/wrap.go => net/tstun/wrap.go +10 -34
@@ 213,24 213,14 @@ type Wrapper struct {
}

type metrics struct {
	inboundDroppedPacketsTotal  *tsmetrics.MultiLabelMap[dropPacketLabel]
	outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel]
	inboundDroppedPacketsTotal  *tsmetrics.MultiLabelMap[usermetric.DropLabels]
	outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels]
}

func registerMetrics(reg *usermetric.Registry) *metrics {
	return &metrics{
		inboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
			reg,
			"tailscaled_inbound_dropped_packets_total",
			"counter",
			"Counts the number of dropped packets received by the node from other peers",
		),
		outboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
			reg,
			"tailscaled_outbound_dropped_packets_total",
			"counter",
			"Counts the number of packets dropped while being sent to other peers",
		),
		inboundDroppedPacketsTotal:  reg.DroppedPacketsInbound(),
		outboundDroppedPacketsTotal: reg.DroppedPacketsOutbound(),
	}
}



@@ 886,8 876,8 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf

	if filt.RunOut(p, t.filterFlags) != filter.Accept {
		metricPacketOutDropFilter.Add(1)
		t.metrics.outboundDroppedPacketsTotal.Add(dropPacketLabel{
			Reason: DropReasonACL,
		t.metrics.outboundDroppedPacketsTotal.Add(usermetric.DropLabels{
			Reason: usermetric.ReasonACL,
		}, 1)
		return filter.Drop, gro
	}


@@ 1158,8 1148,8 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca

	if outcome != filter.Accept {
		metricPacketInDropFilter.Add(1)
		t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
			Reason: DropReasonACL,
		t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{
			Reason: usermetric.ReasonACL,
		}, 1)

		// Tell them, via TSMP, we're dropping them due to the ACL.


@@ 1239,8 1229,8 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) {
		t.noteActivity()
		_, err := t.tdevWrite(buffs, offset)
		if err != nil {
			t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
				Reason: DropReasonError,
			t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{
				Reason: usermetric.ReasonError,
			}, int64(len(buffs)))
		}
		return len(buffs), err


@@ 1482,20 1472,6 @@ var (
	metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco")
)

type DropReason string

const (
	DropReasonACL   DropReason = "acl"
	DropReasonError DropReason = "error"
)

type dropPacketLabel struct {
	// Reason indicates what we have done with the packet, and has the following values:
	// - acl (rejected packets because of ACL)
	// - error (rejected packets because of an error)
	Reason DropReason
}

func (t *Wrapper) InstallCaptureHook(cb capture.Callback) {
	t.captureHook.Store(cb)
}

M net/tstun/wrap_test.go => net/tstun/wrap_test.go +3 -3
@@ 441,13 441,13 @@ func TestFilter(t *testing.T) {
	}

	var metricInboundDroppedPacketsACL, metricInboundDroppedPacketsErr, metricOutboundDroppedPacketsACL int64
	if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
	if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok {
		metricInboundDroppedPacketsACL = m.Value()
	}
	if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError}).(*expvar.Int); ok {
	if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonError}).(*expvar.Int); ok {
		metricInboundDroppedPacketsErr = m.Value()
	}
	if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
	if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok {
		metricOutboundDroppedPacketsACL = m.Value()
	}


A util/usermetric/metrics.go => util/usermetric/metrics.go +69 -0
@@ 0,0 1,69 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause

// This file contains user-facing metrics that are used by multiple packages.
// Use it to define more common metrics. Any changes to the registry and
// metric types should be in usermetric.go.

package usermetric

import (
	"sync"

	"tailscale.com/metrics"
)

// Metrics contains user-facing metrics that are used by multiple packages.
type Metrics struct {
	initOnce sync.Once

	droppedPacketsInbound  *metrics.MultiLabelMap[DropLabels]
	droppedPacketsOutbound *metrics.MultiLabelMap[DropLabels]
}

// DropReason is the reason why a packet was dropped.
type DropReason string

const (
	// ReasonACL means that the packet was not permitted by ACL.
	ReasonACL DropReason = "acl"

	// ReasonError means that the packet was dropped because of an error.
	ReasonError DropReason = "error"
)

// DropLabels contains common label(s) for dropped packet counters.
type DropLabels struct {
	Reason DropReason
}

// initOnce initializes the common metrics.
func (r *Registry) initOnce() {
	r.m.initOnce.Do(func() {
		r.m.droppedPacketsInbound = NewMultiLabelMapWithRegistry[DropLabels](
			r,
			"tailscaled_inbound_dropped_packets_total",
			"counter",
			"Counts the number of dropped packets received by the node from other peers",
		)
		r.m.droppedPacketsOutbound = NewMultiLabelMapWithRegistry[DropLabels](
			r,
			"tailscaled_outbound_dropped_packets_total",
			"counter",
			"Counts the number of packets dropped while being sent to other peers",
		)
	})
}

// DroppedPacketsOutbound returns the outbound dropped packet metric, creating it
// if necessary.
func (r *Registry) DroppedPacketsOutbound() *metrics.MultiLabelMap[DropLabels] {
	r.initOnce()
	return r.m.droppedPacketsOutbound
}

// DroppedPacketsInbound returns the inbound dropped packet metric.
func (r *Registry) DroppedPacketsInbound() *metrics.MultiLabelMap[DropLabels] {
	r.initOnce()
	return r.m.droppedPacketsInbound
}

M util/usermetric/usermetric.go => util/usermetric/usermetric.go +3 -0
@@ 19,6 19,9 @@ import (
// Registry tracks user-facing metrics of various Tailscale subsystems.
type Registry struct {
	vars expvar.Map

	// m contains common metrics owned by the registry.
	m Metrics
}

// NewMultiLabelMapWithRegistry creates and register a new

M wgengine/magicsock/derp.go => wgengine/magicsock/derp.go +3 -0
@@ 674,6 674,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
			if err != nil {
				c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
				metricSendDERPError.Add(1)
				if !wr.isDisco {
					c.metrics.outboundPacketsDroppedErrors.Add(1)
				}
			} else if !wr.isDisco {
				c.metrics.outboundPacketsDERPTotal.Add(1)
				c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b)))

M wgengine/magicsock/magicsock.go => wgengine/magicsock/magicsock.go +14 -1
@@ 127,6 127,10 @@ type metrics struct {
	outboundBytesIPv4Total expvar.Int
	outboundBytesIPv6Total expvar.Int
	outboundBytesDERPTotal expvar.Int

	// outboundPacketsDroppedErrors is the total number of outbound packets
	// dropped due to errors.
	outboundPacketsDroppedErrors expvar.Int
}

// A Conn routes UDP packets and actively manages a list of its endpoints.


@@ 605,6 609,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
		"counter",
		"Counts the number of bytes sent to other peers",
	)
	outboundPacketsDroppedErrors := reg.DroppedPacketsOutbound()

	m := new(metrics)

	// Map clientmetrics to the usermetric counters.


@@ 631,6 637,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
	outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total)
	outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal)

	outboundPacketsDroppedErrors.Set(usermetric.DropLabels{Reason: usermetric.ReasonError}, &m.outboundPacketsDroppedErrors)

	return m
}



@@ 1202,8 1210,13 @@ func (c *Conn) networkDown() bool { return !c.networkUp.Load() }
// Send implements conn.Bind.
//
// See https://pkg.go.dev/golang.zx2c4.com/wireguard/conn#Bind.Send
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) error {
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) (err error) {
	n := int64(len(buffs))
	defer func() {
		if err != nil {
			c.metrics.outboundPacketsDroppedErrors.Add(n)
		}
	}()
	metricSendData.Add(n)
	if c.networkDown() {
		metricSendDataNetworkDown.Add(n)

M wgengine/magicsock/magicsock_test.go => wgengine/magicsock/magicsock_test.go +25 -0
@@ 63,6 63,7 @@ import (
	"tailscale.com/types/nettype"
	"tailscale.com/types/ptr"
	"tailscale.com/util/cibuild"
	"tailscale.com/util/must"
	"tailscale.com/util/racebuild"
	"tailscale.com/util/set"
	"tailscale.com/util/usermetric"


@@ 3083,3 3084,27 @@ func TestMaybeRebindOnError(t *testing.T) {
		}
	})
}

func TestNetworkDownSendErrors(t *testing.T) {
	netMon := must.Get(netmon.New(t.Logf))
	defer netMon.Close()

	reg := new(usermetric.Registry)
	conn := must.Get(NewConn(Options{
		DisablePortMapper: true,
		Logf:              t.Logf,
		NetMon:            netMon,
		Metrics:           reg,
	}))
	defer conn.Close()

	conn.SetNetworkUp(false)
	if err := conn.Send([][]byte{{00}}, &lazyEndpoint{}); err == nil {
		t.Error("expected error, got nil")
	}
	resp := httptest.NewRecorder()
	reg.Handler(resp, new(http.Request))
	if !strings.Contains(resp.Body.String(), `tailscaled_outbound_dropped_packets_total{reason="error"} 1`) {
		t.Errorf("expected NetworkDown to increment packet dropped metric; got %q", resp.Body.String())
	}
}