~mna/siberian

15ed568568f45b65cc22aa0fdd85ee34c772f296 — Martin Angers 4 years ago 33a158b
implement Bytes and Runes matchers
2 files changed, 78 insertions(+), 1 deletions(-)

M matcher.go
M matcher_test.go
M matcher.go => matcher.go +34 -1
@@ 10,7 10,7 @@ import (
)

// NOTE: Matchers can implement optional interfaces to e.g. indicate a
// friendly name to be used in messages?
// friendly name to be used in messages? Also how to create AST from this?

// Matcher defines the method to identify a match on the input bytes.
// Match must return the length of the match, which may be 0. A


@@ 98,3 98,36 @@ func EqualFold(s string) MatcherFunc {
		return -1
	}
}

// Bytes matches any n bytes if the input has at least n bytes.
// Note that consuming bytes this way may end in the middle of a
// utf-8 rune.
func Bytes(n int) MatcherFunc {
	return func(b []byte) int {
		if len(b) < n {
			return -1
		}
		return n
	}
}

// Runes matches any n runes if the input has at least n valid
// runes.
func Runes(n int) MatcherFunc {
	return func(b []byte) int {
		if len(b) < n {
			// not even n bytes in the input, cannot succeed
			return -1
		}
		var total int
		for i := 0; i < n; i++ {
			r, sz := utf8.DecodeRune(b)
			if r == utf8.RuneError && sz < 2 {
				return -1
			}
			b = b[sz:]
			total += sz
		}
		return total
	}
}

M matcher_test.go => matcher_test.go +44 -0
@@ 119,3 119,47 @@ func TestEqualFold(t *testing.T) {
		})
	}
}

func TestBytes(t *testing.T) {
	cases := []matcherTest{
		{"", -1},
		{"a", -1},
		{"ab", 2},
		{"abc", 2},
		{"•", 2},
		{"\xff\xfe", 2},
		{"👪", 2},
	}
	m := Bytes(2)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in))
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}
		})
	}
}

func TestRunes(t *testing.T) {
	cases := []matcherTest{
		{"", -1},
		{"a", -1},
		{"ab", 2},
		{"abc", 2},
		{"•", -1},
		{"•å", 5},
		{"\xff\xfe", -1},
		{"👪", -1},
		{"👪🤡", 8},
	}
	m := Runes(2)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in))
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}
		})
	}
}