~mna/siberian

2b5a82ec88931a3ab93b715c763c474ca683a392 — Martin Angers 2 years ago a645b2d
figure out how to do the visit matcher, test and and not
7 files changed, 161 insertions(+), 59 deletions(-)

M ebnf.go
M ebnf_test.go
M internal/jsonmatcher/jsonmatcher.go
M matcher.go
M matcher_test.go
M parse.go
M parse_test.go
M ebnf.go => ebnf.go +6 -6
@@ 20,9 20,9 @@ func (a *Alt) Match(b []byte, p int) int {
}

// Visit implements MatcherVisitor for Atl.
func (a *Alt) Visit(fn func(m Matcher) Matcher) {
func (a *Alt) Visit(vc *VisitContext, fn func(Matcher) Matcher) {
	for i, m := range a.Ms {
		a.Ms[i] = VisitMatcher(m, fn)
		a.Ms[i] = vc.Visit(m, fn)
	}
}



@@ 125,8 125,8 @@ func (r *Repeat) Match(b []byte, p int) int {
}

// Visit implements MatcherVisitor for Repeat.
func (r *Repeat) Visit(fn func(Matcher) Matcher) {
	r.M = VisitMatcher(r.M, fn)
func (r *Repeat) Visit(vc *VisitContext, fn func(Matcher) Matcher) {
	r.M = vc.Visit(r.M, fn)
}

// Seq is a Matcher that matches if the Ms all match in sequence.


@@ 149,8 149,8 @@ func (s *Seq) Match(b []byte, p int) int {
}

// Visit implements MatcherVisitor for Seq.
func (s *Seq) Visit(fn func(Matcher) Matcher) {
func (s *Seq) Visit(vc *VisitContext, fn func(Matcher) Matcher) {
	for i, m := range s.Ms {
		s.Ms[i] = VisitMatcher(m, fn)
		s.Ms[i] = vc.Visit(m, fn)
	}
}

M ebnf_test.go => ebnf_test.go +58 -17
@@ 1,6 1,10 @@
package siberian
package siberian_test

import "testing"
import (
	"testing"

	"git.sr.ht/~mna/siberian"
)

func TestAlt(t *testing.T) {
	cases := []matcherTest{


@@ 16,11 20,11 @@ func TestAlt(t *testing.T) {
		{"def", 3},
		{"g", -1},
	}
	alt := Alt{
		Ms: []Matcher{
			Equal("a"),
			Equal("bc"),
			Equal("def"),
	alt := siberian.Alt{
		Ms: []siberian.Matcher{
			siberian.Equal("a"),
			siberian.Equal("bc"),
			siberian.Equal("def"),
		},
	}
	for _, c := range cases {


@@ 43,7 47,7 @@ func TestRepeat_ZeroOrOne(t *testing.T) {
		{"ababab", 2}, // one is max
		{"cabab", 0},
	}
	rep := ZeroOrOne(Equal("ab"))
	rep := siberian.ZeroOrOne(siberian.Equal("ab"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in), 0)


@@ 64,7 68,7 @@ func TestRepeat_OneOrMore(t *testing.T) {
		{"ababab", 6},
		{"cabab", -1},
	}
	rep := OneOrMore(Equal("ab"))
	rep := siberian.OneOrMore(siberian.Equal("ab"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in), 0)


@@ 85,7 89,7 @@ func TestRepeat_ZeroOrMore(t *testing.T) {
		{"ababab", 6},
		{"cabab", 0},
	}
	rep := ZeroOrMore(Equal("ab"))
	rep := siberian.ZeroOrMore(siberian.Equal("ab"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := rep.Match([]byte(c.in), 0)


@@ 107,8 111,8 @@ func TestRepeat_TwoOrThree(t *testing.T) {
		{"abababab", 6},
		{"cabab", -1},
	}
	rep := Repeat{
		M:   Equal("ab"),
	rep := siberian.Repeat{
		M:   siberian.Equal("ab"),
		Min: 2,
		Max: 3,
	}


@@ 134,11 138,11 @@ func TestSeq(t *testing.T) {
		{"abcdefg", 6},
		{"zabcdef", -1},
	}
	seq := Seq{
		Ms: []Matcher{
			Equal("a"),
			Equal("bc"),
			Equal("def"),
	seq := siberian.Seq{
		Ms: []siberian.Matcher{
			siberian.Equal("a"),
			siberian.Equal("bc"),
			siberian.Equal("def"),
		},
	}
	for _, c := range cases {


@@ 150,3 154,40 @@ func TestSeq(t *testing.T) {
		})
	}
}

func TestNot(t *testing.T) {
	cases := []matcherTest{
		{"", 0},
		{"a", -1},
		{"b", 0},
		{"ba", 0},
	}
	m := siberian.Not(siberian.Equal("a"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}
		})
	}
}

func TestAnd(t *testing.T) {
	cases := []matcherTest{
		{"", -1},
		{"a", 0},
		{"b", -1},
		{"ba", -1},
		{"ab", 0},
	}
	m := siberian.And(siberian.Equal("a"))
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}
		})
	}
}

M internal/jsonmatcher/jsonmatcher.go => internal/jsonmatcher/jsonmatcher.go +2 -2
@@ 4,12 4,12 @@ import "git.sr.ht/~mna/siberian"

func init() {
	value.Ms = []siberian.Matcher{
		boolVal,
		nullVal,
		objectVal,
		arrayVal,
		numberVal,
		stringVal,
		boolVal,
		nullVal,
	}
}


M matcher.go => matcher.go +1 -1
@@ 21,7 21,7 @@ type Matcher interface {
// MatcherVisitor defines the method to visit the Matchers and possibly
// replace or wrap them in a different Matcher.
type MatcherVisitor interface {
	Visit(fn func(Matcher) Matcher)
	Visit(vc *VisitContext, fn func(Matcher) Matcher)
}

// MatcherFunc is a function type that implements Matcher by calling

M matcher_test.go => matcher_test.go +12 -10
@@ 1,9 1,11 @@
package siberian
package siberian_test

import (
	"regexp"
	"testing"
	"unicode"

	"git.sr.ht/~mna/siberian"
)

type matcherTest struct {


@@ 21,11 23,11 @@ func TestASCII(t *testing.T) {
		{"az", 1},
		{"za", -1},
	}
	var at ASCIITable
	var at siberian.ASCIITable
	at.Set([]byte("abc")...)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := ASCII(&at)
			m := siberian.ASCII(&at)
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)


@@ 45,7 47,7 @@ func TestUnicode(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := Unicode(unicode.Ll) // letter, lowercase
			m := siberian.Unicode(unicode.Ll) // letter, lowercase
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)


@@ 66,7 68,7 @@ func TestRegexp(t *testing.T) {
	re := regexp.MustCompile(`^[a-c]+`)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := Regexp(re)
			m := siberian.Regexp(re)
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)


@@ 88,7 90,7 @@ func TestEqual(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := Equal("abc")
			m := siberian.Equal("abc")
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)


@@ 111,7 113,7 @@ func TestEqualFold(t *testing.T) {
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			m := EqualFold("abc")
			m := siberian.EqualFold("abc")
			n := m.Match([]byte(c.in), 0)
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)


@@ 130,7 132,7 @@ func TestBytes(t *testing.T) {
		{"\xff\xfe", 2},
		{"👪", 2},
	}
	m := Bytes(2)
	m := siberian.Bytes(2)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in), 0)


@@ 153,7 155,7 @@ func TestRunes(t *testing.T) {
		{"👪", -1},
		{"👪🤡", 8},
	}
	m := Runes(2)
	m := siberian.Runes(2)
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in), 0)


@@ 170,7 172,7 @@ func TestEOF(t *testing.T) {
		{"a", -1},
		{"ab", -1},
	}
	var m Matcher = EOF
	var m siberian.Matcher = siberian.EOF
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
			n := m.Match([]byte(c.in), 0)

M parse.go => parse.go +41 -4
@@ 1,5 1,7 @@
package siberian

import "reflect"

// Matches returns true if m matches for input b. Note that if m allows a
// zero match, Matches still returns true (a 0-match is still a match).
// To check if m matches the full input b, make sure the matcher ends with


@@ 9,11 11,46 @@ func Matches(m Matcher, b []byte) bool {
	return n >= 0
}

// VisitContext maintains context during a visit of Matchers. Because
// Matchers can refer to themselves and have cycles, it keeps track
// of Matchers that have already been visited.
type VisitContext struct {
	// seen maps the original Matcher to the Matcher that should replace it.
	seen map[Matcher]Matcher
}

// Visit visits m and applies fn to it, using this VisitContext.
func (vc *VisitContext) Visit(m Matcher, fn func(Matcher) Matcher) Matcher {
	// if the Matcher is not a valid map key, it cannot contain child Matchers
	// anyway (and thus should not cause any cycle).
	val := reflect.ValueOf(m)
	if val.Kind() == reflect.Interface {
		val = val.Elem()
	}

	var keyable bool
	if kind := val.Kind(); kind != reflect.Func && kind != reflect.Map && kind != reflect.Slice {
		keyable = true
	}
	if keyable {
		if mm, ok := vc.seen[m]; ok {
			return mm
		}
	}

	mm := fn(m)
	if keyable {
		vc.seen[m] = mm
	}
	if v, ok := m.(MatcherVisitor); ok {
		v.Visit(vc, fn)
	}
	return mm
}

// VisitMatcher visits m and any matchers it wraps, calling fn for each,
// possibly wrapping or replacing the matchers with its returned value.
func VisitMatcher(m Matcher, fn func(Matcher) Matcher) Matcher {
	if v, ok := m.(MatcherVisitor); ok {
		v.Visit(fn)
	}
	return fn(m)
	vc := &VisitContext{seen: make(map[Matcher]Matcher)}
	return vc.Visit(m, fn)
}

M parse_test.go => parse_test.go +41 -19
@@ 1,23 1,28 @@
package siberian
package siberian_test

import "testing"
import (
	"testing"

	"git.sr.ht/~mna/siberian"
	"git.sr.ht/~mna/siberian/internal/jsonmatcher"
)

func TestMatches_AB(t *testing.T) {
	// Start = { As | B } .
	// As = A+ .
	// A = "a" .
	// B = "b" .
	g := &Repeat{
	g := &siberian.Repeat{
		Min: 0,
		Max: -1,
		M: &Alt{
			Ms: []Matcher{
				&Repeat{
		M: &siberian.Alt{
			Ms: []siberian.Matcher{
				&siberian.Repeat{
					Min: 1,
					Max: -1,
					M:   Equal("a"),
					M:   siberian.Equal("a"),
				},
				Equal("b"),
				siberian.Equal("b"),
			},
		},
	}


@@ 38,7 43,7 @@ func TestMatches_AB(t *testing.T) {
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}
			matches := Matches(g, []byte(c.in))
			matches := siberian.Matches(g, []byte(c.in))
			if want := n >= 0; want != matches {
				t.Errorf("want match? %t, got %t", want, matches)
			}


@@ 51,23 56,23 @@ func TestMatches_AB_EOF(t *testing.T) {
	// As = A+ .
	// A = "a" .
	// B = "b" .
	g := &Seq{
		Ms: []Matcher{
			&Repeat{
	g := &siberian.Seq{
		Ms: []siberian.Matcher{
			&siberian.Repeat{
				Min: 0,
				Max: -1,
				M: &Alt{
					Ms: []Matcher{
						&Repeat{
				M: &siberian.Alt{
					Ms: []siberian.Matcher{
						&siberian.Repeat{
							Min: 1,
							Max: -1,
							M:   Equal("a"),
							M:   siberian.Equal("a"),
						},
						Equal("b"),
						siberian.Equal("b"),
					},
				},
			},
			EOF,
			siberian.EOF,
		},
	}
	cases := []matcherTest{


@@ 87,10 92,27 @@ func TestMatches_AB_EOF(t *testing.T) {
			if n != c.len {
				t.Errorf("want match of length %d, got %d", c.len, n)
			}
			matches := Matches(g, []byte(c.in))
			matches := siberian.Matches(g, []byte(c.in))
			if want := n >= 0; want != matches {
				t.Errorf("want match? %t, got %t", want, matches)
			}
		})
	}
}

func TestVisitMatcher(t *testing.T) {
	var countMatches int
	m := siberian.VisitMatcher(jsonmatcher.Val, func(m siberian.Matcher) siberian.Matcher {
		return siberian.MatcherFunc(func(b []byte, p int) int {
			n := m.Match(b, p)
			if n >= 0 {
				countMatches++
			}
			return n
		})
	})
	siberian.Matches(m, []byte("null"))
	if countMatches != 2 {
		t.Fatalf("want %d matches, got %d", 2, countMatches)
	}
}