~mna/siberian

c57701ee68e661e9195f11d396467edacf869650 — Martin Angers 4 years ago 4d0db11
implement json grammar, test some parsing values
M .golangci.toml => .golangci.toml +4 -1
@@ 28,6 28,9 @@
  ]


[issues.exclude-rules]
[[issues.exclude-rules]]
  path = "_test.go"
  linters = ["gochecknoglobals"]
[[issues.exclude-rules]]
  path = "internal/jsonmatcher/"
  linters = ["gochecknoglobals", "gochecknoinits"]

M ascii_table.go => ascii_table.go +36 -0
@@ 31,6 31,42 @@ func (t *ASCIITable) Unset(chars ...byte) {
	}
}

// SetRange sets all chars in [from, to] inclusively as allowed. It panics if
// from is greater than to or if to is outside the ASCII range.
func (t *ASCIITable) SetRange(from, to byte) {
	t.setUnsetRange(from, to, true)
}

// UnsetRange sets all chars in [from, to] inclusively as disallowed. It panics if
// from is greater than to or if to is outside the ASCII range.
func (t *ASCIITable) UnsetRange(from, to byte) {
	t.setUnsetRange(from, to, false)
}

// Invert makes all set chars disallowed and all unset chars allowed.
func (t *ASCIITable) Invert() {
	t[0] = ^t[0]
	t[1] = ^t[1]
}

func (t *ASCIITable) setUnsetRange(from, to byte, set bool) {
	if from > to {
		panic(fmt.Sprintf("from byte %x is greater than to byte %x", from, to))
	}
	if to > unicode.MaxASCII {
		panic(fmt.Sprintf("%x is outside the ASCII range", to))
	}
	rng := make([]byte, to-from+1)
	for i := from; i <= to; i++ {
		rng[i-from] = i
	}
	if set {
		t.Set(rng...)
	} else {
		t.Unset(rng...)
	}
}

// Are returns -1, true if all bytes in b are allowed by this ascii
// table, or the index of the first disallowed byte and false.
func (t *ASCIITable) Are(b []byte) (n int, ok bool) {

M ascii_table_test.go => ascii_table_test.go +39 -0
@@ 44,6 44,45 @@ func TestASCIITable(t *testing.T) {
	}
}

func TestASCIITable_Range(t *testing.T) {
	cases := []struct {
		from, to   string
		ufrom, uto string
		invert     bool
		want       string
	}{
		{"a", "a", "", "", false, "[a]"},
		{"a", "d", "", "", false, "[a-d]"},
		{"A", "z", "", "", false, "[A-z]"},
		{"0", "9", "", "", false, "[0-9]"},
		{"0", "9", "2", "8", false, "[0-1,9]"},
		{"A", "z", "A", "a", false, "[b-z]"},
		{"", "", "", "", true, "[x00-x7f]"},
		{"\x20", "\x7f", "", "", true, "[x00-x1f]"},
		{"\x00", "@", "", "", true, "[A-x7f]"},
	}
	for _, c := range cases {
		t.Run(fmt.Sprintf("from: %q, to: %q", c.from, c.to), func(t *testing.T) {
			var at ASCIITable

			if len(c.from) > 0 {
				at.SetRange(c.from[0], c.to[0])
			}
			if len(c.ufrom) > 0 {
				at.UnsetRange(c.ufrom[0], c.uto[0])
			}
			if c.invert {
				at.Invert()
			}

			t.Logf("\n%#v\n", at)
			if got := at.String(); got != c.want {
				t.Fatalf("want\n%qgot\n%q\n", c.want, got)
			}
		})
	}
}

func TestASCIITable_IsNonASCII(t *testing.T) {
	var at ASCIITable
	at.Set('a', 'b')

A ast.go => ast.go +1 -0
@@ 0,0 1,1 @@
package siberian

M ebnf.go => ebnf.go +12 -0
@@ 19,6 19,18 @@ func (a *Alt) Match(b []byte, p int) int {
	return -1
}

// Not returns a Matcher that returns a zero-match if m does not match,
// and a no-match if m does match.
func Not(m Matcher) MatcherFunc {
	return func(b []byte, p int) int {
		n := m.Match(b, p)
		if n >= 0 {
			return -1
		}
		return 0
	}
}

// ZeroOrOne returns a Repeat matcher that matches m zero or one
// time.
func ZeroOrOne(m Matcher) Matcher {

A internal/jsonmatcher/jsonmatcher.go => internal/jsonmatcher/jsonmatcher.go +158 -0
@@ 0,0 1,158 @@
package jsonmatcher

import "git.sr.ht/~mna/siberian"

func init() {
	value.Ms = []siberian.Matcher{
		objectVal,
		arrayVal,
		numberVal,
		stringVal,
		boolVal,
		nullVal,
	}
}

// Doc is the siberian.Matcher for a full json document (single value until EOF).
var Doc siberian.Matcher = jsonDoc

// Val is the siberian.Matcher for a json value that may not consume the whole input.
var Val siberian.Matcher = value

var (
	jsonDoc = &siberian.Seq{Ms: []siberian.Matcher{
		_ws, value, _ws,
		siberian.EOF,
	}}

	value = &siberian.Alt{
		/*
		   // set in init func to prevent initialization cycle
		   Ms: []siberian.Matcher{
		     objectVal,
		     arrayVal,
		     numberVal,
		     stringVal,
		     boolVal,
		     nullVal,
		   },
		*/
	}

	objectVal = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.Equal("{"), _ws,
		siberian.ZeroOrOne(keyValList),
		_ws, siberian.Equal("}"),
	}}

	keyValList = &siberian.Seq{Ms: []siberian.Matcher{
		keyVal,
		siberian.ZeroOrMore(&siberian.Seq{Ms: []siberian.Matcher{
			_ws, siberian.Equal(","),
			keyVal,
		}}),
	}}

	keyVal = &siberian.Seq{Ms: []siberian.Matcher{
		_ws, stringVal,
		_ws, siberian.Equal(":"),
		_ws, value,
	}}

	arrayVal = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.Equal("["), _ws,
		siberian.ZeroOrOne(valueList),
		_ws, siberian.Equal("]"),
	}}

	valueList = &siberian.Seq{Ms: []siberian.Matcher{
		value,
		siberian.ZeroOrMore(&siberian.Seq{Ms: []siberian.Matcher{
			_ws, siberian.Equal(","),
			_ws, value,
		}}),
	}}

	numberVal = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.ZeroOrOne(siberian.Equal("-")),
		integerVal,
		siberian.ZeroOrOne(&siberian.Seq{Ms: []siberian.Matcher{
			siberian.Equal("."),
			siberian.OneOrMore(decimalDigit),
		}}),
		siberian.ZeroOrOne(exponent),
	}}

	exponent = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.EqualFold("e"),
		siberian.ZeroOrOne(siberian.ASCII(buildASCIITable("-+"))),
		siberian.OneOrMore(decimalDigit),
	}}

	integerVal = &siberian.Alt{Ms: []siberian.Matcher{
		siberian.Equal("0"),
		&siberian.Seq{Ms: []siberian.Matcher{
			nonZeroDecimalDigit,
			siberian.ZeroOrMore(decimalDigit),
		}},
	}}

	stringVal = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.Equal("\""),
		siberian.ZeroOrMore(
			&siberian.Alt{Ms: []siberian.Matcher{
				// either a non-escaped rune
				&siberian.Seq{Ms: []siberian.Matcher{
					siberian.Not(mustEscapeChar),
					siberian.Runes(1),
				}},
				// ... or an escaped sequence
				escapeSeq,
			}},
		),
		siberian.Equal("\""),
	}}

	escapeSeq = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.Equal("\\"),
		&siberian.Alt{Ms: []siberian.Matcher{
			singleEscapeChar,
			unicodeEscapeSeq,
		}},
	}}

	mustEscapeChar   = siberian.ASCII(buildASCIITable("\x00-\x1f", "\"\\"))
	singleEscapeChar = siberian.ASCII(buildASCIITable("\\/bfnrt"))
	unicodeEscapeSeq = &siberian.Seq{Ms: []siberian.Matcher{
		siberian.Equal("u"),
		&siberian.Repeat{
			Min: 4,
			Max: 4,
			M:   hexDigit,
		},
	}}

	boolVal = &siberian.Alt{Ms: []siberian.Matcher{trueVal, falseVal}}

	trueVal             = siberian.Equal("true")
	falseVal            = siberian.Equal("false")
	nullVal             = siberian.Equal("null")
	decimalDigit        = siberian.ASCII(buildASCIITable("0-9"))
	hexDigit            = siberian.ASCII(buildASCIITable("0-9", "a-f", "A-F"))
	nonZeroDecimalDigit = siberian.ASCII(buildASCIITable("1-9"))
	whitespace          = siberian.ASCII(buildASCIITable(" \t\r\n"))
	_ws                 = siberian.ZeroOrMore(whitespace)
)

func buildASCIITable(pats ...string) *siberian.ASCIITable {
	var t siberian.ASCIITable
	for _, pat := range pats {
		if len(pat) == 3 && pat[1] == '-' {
			// range pattern, from-to
			t.SetRange(pat[0], pat[2])
		} else {
			t.Set([]byte(pat)...)
		}
	}
	return &t
}

A internal/jsonmatcher/jsonmatcher_test.go => internal/jsonmatcher/jsonmatcher_test.go +38 -0
@@ 0,0 1,38 @@
package jsonmatcher

import (
	"testing"

	"git.sr.ht/~mna/siberian"
)

func TestMatches(t *testing.T) {
	cases := []string{
		"true",
		"false",
		"null",
		"0",
		"1",
		"-1",
		"-1.0",
		"-1.0123456789",
		"-1.2e+345",
		"0.2e-01",
		`""`,
		`"abc"`,
		`"abc\"d"`,
		`"\\\/\b\f\n\r\t\""`,
		`[]`,
		`[true]`,
		`[true,false,1,2]`,
		`{}`,
		`{"x": 1, "y": 2}`,
	}
	for _, c := range cases {
		t.Run(c, func(t *testing.T) {
			if !siberian.Matches(Doc, []byte(c)) {
				t.Fatal("failed to match")
			}
		})
	}
}