~mna/siberian

db08d8194cd1b364298834aba295d9d169c72718 — Martin Angers 4 years ago 9587361
change Matcher interface to take start position
5 files changed, 44 insertions(+), 39 deletions(-)

M ebnf.go
M ebnf_test.go
M grammar_test.go
M matcher.go
M matcher_test.go
M ebnf.go => ebnf.go +6 -8
@@ 10,9 10,9 @@ type Alt struct {
}

// Match implements Matcher for Alt.
func (a *Alt) Match(b []byte) int {
func (a *Alt) Match(b []byte, p int) int {
	for _, m := range a.Ms {
		if n := m.Match(b); n >= 0 {
		if n := m.Match(b, p); n >= 0 {
			return n
		}
	}


@@ 71,7 71,7 @@ type Repeat struct {
}

// Match implements Matcher for Repeat.
func (r *Repeat) Match(b []byte) int {
func (r *Repeat) Match(b []byte, p int) int {
	// handle impossible cases
	if r.Min < 0 || (r.Max >= 0 && r.Max < r.Min) {
		panic(fmt.Sprintf("invalid Repeat limits: min=%d; max=%d", r.Min, r.Max))


@@ 83,7 83,7 @@ func (r *Repeat) Match(b []byte) int {
			return n
		}

		nn := r.M.Match(b)
		nn := r.M.Match(b, p+n)
		if nn < 0 {
			if count >= r.Min {
				return n


@@ 92,7 92,6 @@ func (r *Repeat) Match(b []byte) int {
		}
		n += nn
		count++
		b = b[nn:]
	}
}



@@ 103,14 102,13 @@ type Seq struct {
}

// Match implement Matcher for Seq.
func (s *Seq) Match(b []byte) int {
func (s *Seq) Match(b []byte, p int) int {
	var n int
	for _, m := range s.Ms {
		nn := m.Match(b)
		nn := m.Match(b, p+n)
		if nn < 0 {
			return nn
		}
		b = b[nn:]
		n += nn
	}
	return n

M ebnf_test.go => ebnf_test.go +6 -6
@@ 25,7 25,7 @@ func TestAlt(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := alt.Match([]byte(c.in))
			n := alt.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 46,7 46,7 @@ func TestRepeat_ZeroOrOne(t *testing.T) {
	rep := ZeroOrOne(Equal("ab"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in))
			n := rep.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 67,7 67,7 @@ func TestRepeat_OneOrMore(t *testing.T) {
	rep := OneOrMore(Equal("ab"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in))
			n := rep.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 88,7 88,7 @@ func TestRepeat_ZeroOrMore(t *testing.T) {
	rep := ZeroOrMore(Equal("ab"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in))
			n := rep.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 114,7 114,7 @@ func TestRepeat_TwoOrThree(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in))
			n := rep.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 143,7 143,7 @@ func TestSeq(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := seq.Match([]byte(c.in))
			n := seq.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}

M grammar_test.go => grammar_test.go +1 -1
@@ 34,7 34,7 @@ func TestGrammar_AB(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := g.Match([]byte(c.in))
			n := g.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}

M matcher.go => matcher.go +23 -16
@@ 9,29 9,29 @@ import (
	"unicode/utf8"
)

// NOTE: Matchers can implement optional interfaces to e.g. indicate a
// friendly name to be used in messages? Also how to create AST from this?

// Matcher defines the method to identify a match on the input bytes.
// Match must return the length of the match, which may be 0. A
// negative value must be returned to indicate no match.
// negative value must be returned to indicate no match. The b slice
// is the full input source, and p is the start position, so typically
// a matcher should try to match at b[p:].
type Matcher interface {
	Match(b []byte) int
	Match(b []byte, p int) int
}

// MatcherFunc is a function type that implements Matcher by calling
// itself.
type MatcherFunc func([]byte) int
type MatcherFunc func([]byte, int) int

// Match implements Matcher for MatcherFunc by calling itself.
func (f MatcherFunc) Match(b []byte) int {
	return f(b)
func (f MatcherFunc) Match(b []byte, p int) int {
	return f(b, p)
}

// ASCII returns a Matcher that matches if the next byte is in the provided
// ASCII table of allowed bytes.
func ASCII(tbl *ASCIITable) MatcherFunc {
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		if len(b) == 0 {
			return -1
		}


@@ 46,7 46,8 @@ func ASCII(tbl *ASCIITable) MatcherFunc {
// range table. The package golang.org/x/text/unicode/rangetableIndex can be
// used to construct a range table.
func Unicode(rt *unicode.RangeTable) MatcherFunc {
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		r, sz := utf8.DecodeRune(b)
		if r == utf8.RuneError && sz < 2 {
			return -1


@@ 65,7 66,8 @@ func Regexp(re *regexp.Regexp) MatcherFunc {
	if !strings.HasPrefix(re.String(), "^") {
		panic(fmt.Sprintf("regular expression %q must be anchored to start of input", re.String()))
	}
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		if ixs := re.FindIndex(b); ixs != nil {
			return ixs[1]
		}


@@ 76,7 78,8 @@ func Regexp(re *regexp.Regexp) MatcherFunc {
// Equal returns a Matcher that matches s exactly.
func Equal(s string) MatcherFunc {
	sb := []byte(s)
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		if bytes.HasPrefix(b, sb) {
			return len(sb)
		}


@@ 88,7 91,8 @@ func Equal(s string) MatcherFunc {
// (case-insensitive).
func EqualFold(s string) MatcherFunc {
	sb := []byte(s)
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		if len(b) < len(sb) {
			return -1
		}


@@ 103,7 107,8 @@ func EqualFold(s string) MatcherFunc {
// Note that consuming bytes this way may end in the middle of a
// utf-8 rune.
func Bytes(n int) MatcherFunc {
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		if len(b) < n {
			return -1
		}


@@ 114,7 119,8 @@ func Bytes(n int) MatcherFunc {
// Runes matches any n runes if the input has at least n valid
// runes.
func Runes(n int) MatcherFunc {
	return func(b []byte) int {
	return func(b []byte, p int) int {
		b = b[p:]
		if len(b) < n {
			// not even n bytes in the input, cannot succeed
			return -1


@@ 135,7 141,8 @@ func Runes(n int) MatcherFunc {
// EOF is a MatcherFunc that matches if the input is at EOF.
var EOF = MatcherFunc(eof)

func eof(b []byte) int {
func eof(b []byte, p int) int {
	b = b[p:]
	if len(b) == 0 {
		return 0
	}

M matcher_test.go => matcher_test.go +8 -8
@@ 26,7 26,7 @@ func TestASCII(t *testing.T) {
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := ASCII(&at)
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 46,7 46,7 @@ func TestUnicode(t *testing.T) {
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := Unicode(unicode.Ll) // letter, lowercase
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 67,7 67,7 @@ func TestRegexp(t *testing.T) {
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := Regexp(re)
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 89,7 89,7 @@ func TestEqual(t *testing.T) {
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := Equal("abc")
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 112,7 112,7 @@ func TestEqualFold(t *testing.T) {
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := EqualFold("abc")
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 133,7 133,7 @@ func TestBytes(t *testing.T) {
	m := Bytes(2)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 156,7 156,7 @@ func TestRunes(t *testing.T) {
	m := Runes(2)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}


@@ 173,7 173,7 @@ func TestEOF(t *testing.T) {
	var m Matcher = EOF
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in))
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}