~mna/zerojson

2dfbb188c00ea07b4fe5364af614861dd82c7463 — Martin Angers 1 year, 18 days ago e61da2d
test nested array/object, streamline errors
3 files changed, 114 insertions(+), 35 deletions(-)

M zerojson.go
M zerojson_bench_test.go
M zerojson_test.go
M zerojson.go => zerojson.go +30 -22
@@ 1,6 1,7 @@
package zerojson

import (
	"fmt"
	"io"
)



@@ 12,14 13,9 @@ func (e errorString) Error() string {
}

const (
	// ErrIncompleteToken is the error representing an incomplete true, false or
	// null token.
	ErrIncompleteToken errorString = "incomplete token"

	// ErrUnclosedString is the error representing an unclosed string value, i.e.
	// when the opening quote was encountered, but EOF was reached before the closing
	// quote, or an unescaped code point that must be escaped is found.
	ErrUnclosedString errorString = "unclosed string"
	// ErrIncompleteValue is the error representing an incomplete true, false or
	// null token, an unclosed string, object or array, or an incomplete number.
	ErrIncompleteValue errorString = "incomplete value"

	// ErrInvalidCodePoint is the error representing an invalid code point. This is
	// any code point disallowed in the JSON grammar (e.g. "!" outside a string value),


@@ 31,9 27,6 @@ const (
	// ErrIncompleteHexEsc is the error representing an invalid \uXXXX hexadecimal
	// escape sequence in a string value.
	ErrIncompleteHexEsc errorString = "incomplete hexadecimal escape sequence"

	// ErrIncompleteNumber is the error representing an invalid number value.
	ErrIncompleteNumber errorString = "incomplete number"
)

// JSON supports 7 different values:


@@ 47,10 40,6 @@ const (
// - Null, starting with 'n'
//
// Of those, Object and Array can nest and as such require a stack.
// However, since String and Number can be arbitrarily long, when tokenizing
// with a limited buffer, those values may be broken down in multiple chunks
// too.
//
// Also, when processing an Object, the parser must keep track of whether
// it is processing a key or a value (both may be strings).



@@ 163,6 152,7 @@ type parser struct {
	allow byte // '\x00'=any value, '<'=key, '>'=value, ':'=colon, ','=comma or ']' or '}'
	pos   int
	stack stack
	debug bool
}

func (p *parser) parse() error {


@@ 183,6 173,11 @@ loop:
		typ = p.cur
		p.advance()

		if p.debug {
			peek := p.stack.peek()
			fmt.Printf("%d: stack=%c ; allow=%c ; char=%c\n", start, peek, typ, p.allow)
		}

		switch p.allow {
		case ':':
			if typ == ':' {


@@ 239,6 234,19 @@ loop:
		default:
			if p.allow == '>' {
				p.allow = ','

				// special case for '>', if inside an array, can be a ']'
				if inside := p.stack.peek(); inside == '[' && typ == ']' {
					p.stack.pop()
					peek := p.stack.peek()
					switch peek {
					case '{', '[':
						p.allow = ','
					default:
						p.allow = 0
					}
					break
				}
			}

			switch typ {


@@ 284,7 292,7 @@ func (p *parser) scanNumber(first byte) error {
			first = p.cur
			p.advance()
		} else {
			return ErrIncompleteNumber
			return ErrIncompleteValue
		}
	}



@@ 304,7 312,7 @@ func (p *parser) scanNumber(first byte) error {
			p.advance()
		}
		if !digitSeen {
			return ErrIncompleteNumber
			return ErrIncompleteValue
		}
	}



@@ 321,7 329,7 @@ func (p *parser) scanNumber(first byte) error {
			p.advance()
		}
		if !digitSeen {
			return ErrIncompleteNumber
			return ErrIncompleteValue
		}
	}



@@ 337,11 345,11 @@ func (p *parser) scanString() error {
				return err
			}
		case p.eof():
			return ErrUnclosedString
			return ErrIncompleteValue
		case p.cur <= 0x1F:
			// do not advance, the control character will be considered outside
			// the string and will generate a distinct ErrInvalidCodePoint.
			return ErrUnclosedString
			return ErrIncompleteValue
		default:
			p.advance()
		}


@@ 365,7 373,7 @@ func (p *parser) scanEscape() error {
		// invalid escape, move back to previous byte and report unclosed
		// string, then treat the single backslash as an invalid code point.
		p.back()
		return ErrUnclosedString
		return ErrIncompleteValue
	}
}



@@ 386,7 394,7 @@ func (p *parser) scanFourHex() error {
func (p *parser) scanToken(trail string) error {
	for _, b := range []byte(trail) {
		if p.cur != b {
			return ErrIncompleteToken
			return ErrIncompleteValue
		}
		p.advance()
	}

M zerojson_bench_test.go => zerojson_bench_test.go +5 -1
@@ 27,7 27,11 @@ func BenchmarkNumber(b *testing.B) {
}

func BenchmarkSmallObject(b *testing.B) {
	benchmarkInput(b, `{"key": true}`)
	benchmarkInput(b, `{"active": true, "name": "Foo Bar", "age": 42}`)
}

func BenchmarkSmallArray(b *testing.B) {
	benchmarkInput(b, `["a string", -10.234, false, null]`)
}

func benchmarkInput(b *testing.B, input string) {

M zerojson_test.go => zerojson_test.go +79 -12
@@ 20,9 20,9 @@ func TestParser(t *testing.T) {
		{"", "", nil},
		{" \t\n ", "", nil},

		{" nul ", "1: n: nul", ErrIncompleteToken},
		{"t", "0: t: t", ErrIncompleteToken},
		{"\t\n fa", "3: f: fa", ErrIncompleteToken},
		{" nul ", "1: n: nul", ErrIncompleteValue},
		{"t", "0: t: t", ErrIncompleteValue},
		{"\t\n fa", "3: f: fa", ErrIncompleteValue},

		{"null", "0: n: null", nil},
		{"true", "0: t: true", nil},


@@ 39,10 39,11 @@ func TestParser(t *testing.T) {
		{`"a"`, `0: ": "a"`, nil},
		{" \t\n" + `"" `, `3: ": ""`, nil},
		{`"a\bc\"\t\n\\\u0123"`, `0: ": "a\bc\"\t\n\\\u0123"`, nil},
		{`"\"`, "0: \": \"\\\"", ErrUnclosedString},
		{`"\a"`, "0: \": \"", ErrUnclosedString},
		{`"`, "0: \": \"", ErrIncompleteValue},
		{`"\"`, "0: \": \"\\\"", ErrIncompleteValue},
		{`"\a"`, "0: \": \"", ErrIncompleteValue},
		{`"\uabc"`, "0: \": \"\\uabc", ErrIncompleteHexEsc},
		{"\"\x00\"", "0: \": \"", ErrUnclosedString},
		{"\"\x00\"", "0: \": \"", ErrIncompleteValue},

		{"0", "0: 1: 0", nil},
		{"1", "0: 1: 1", nil},


@@ 54,7 55,7 @@ func TestParser(t *testing.T) {
		{"7", "0: 1: 7", nil},
		{"8", "0: 1: 8", nil},
		{"9", "0: 1: 9", nil},
		{"00", "0: 1: 0\n1: 1: 0\n", nil}, // TODO: would it be better to error?
		{"00", "0: 1: 0\n1: 1: 0\n", nil},
		{"-0", "0: 1: -0", nil},
		{"-1", "0: 1: -1", nil},
		{"-2", "0: 1: -2", nil},


@@ 65,12 66,13 @@ func TestParser(t *testing.T) {
		{"-7", "0: 1: -7", nil},
		{"-8", "0: 1: -8", nil},
		{"-9", "0: 1: -9", nil},
		{"-01", "0: 1: -0\n2: 1: 1", nil}, // TODO: same
		{"-01", "0: 1: -0\n2: 1: 1", nil},
		{"1234567890", "0: 1: 1234567890", nil},
		{"-1234567890", "0: 1: -1234567890", nil},
		{"1.0123456789", "0: 1: 1.0123456789", nil},
		{"1.", "0: 1: 1.", ErrIncompleteNumber},
		{"-", "0: 1: -", ErrIncompleteNumber},
		{"1.", "0: 1: 1.", ErrIncompleteValue},
		{"-", "0: 1: -", ErrIncompleteValue},
		{"-1e", "0: 1: -1e", ErrIncompleteValue},
		{"0.123456789", "0: 1: 0.123456789", nil},
		{"-0.123456789", "0: 1: -0.123456789", nil},



@@ 98,6 100,71 @@ func TestParser(t *testing.T) {

		{"{}", "0: {: {\n1: }: }", nil},
		{`{"a" : 1}`, "0: {: {\n1: \": \"a\"\n7: 1: 1\n8: }: }", nil},
		{` {"a" : 1,"b":true , "c" : "d" } `, `
1: {: {
2: ": "a"
8: 1: 1
10: ": "b"
14: t: true
21: ": "c"
27: ": "d"
31: }: }
`, nil},
		{`{"a": {"b": {"c": "d", "e": 1}, "f": null}}`, `
0: {: {
1: ": "a"
6: {: {
7: ": "b"
12: {: {
13: ": "c"
18: ": "d"
23: ": "e"
28: 1: 1
29: }: }
32: ": "f"
37: n: null
41: }: }
42: }: }
`, nil},

		{"[]", "0: [: [\n1: ]: ]", nil},
		{"[true]", "0: [: [\n1: t: true\n5: ]: ]", nil},
		{"[true,1 , false ]", `
0: [: [
1: t: true
6: 1: 1
10: f: false
16: ]: ]
`, nil},

		{`[1.02 , "a\b\"" , [ null, [] ], false]`, `
0: [: [
1: 1: 1.02
8: ": "a\b\""
18: [: [
20: n: null
26: [: [
27: ]: ]
29: ]: ]
32: f: false
37: ]: ]
`, nil},

		{`[{}]`, `
0: [: [
1: {: {
2: }: }
3: ]: ]
`, nil},
		{`{"": []}`, `
0: {: {
1: ": ""
5: [: [
6: ]: ]
7: }: }
`, nil},

		{"{", "0: {: {", nil},
	}

	for _, c := range cases {


@@ 115,14 182,14 @@ func TestParser(t *testing.T) {
				},
			}
			err := p.parse()
			require.Equal(t, strings.TrimSpace(c.out), strings.TrimSpace(buf.String()))

			if c.err != nil {
				require.Error(t, err)
				require.Equal(t, c.err, err)
			} else {
				require.NoError(t, err)
			}

			require.Equal(t, strings.TrimSpace(c.out), strings.TrimSpace(buf.String()))
		})
	}
}