2dfbb188c00ea07b4fe5364af614861dd82c7463 — Martin Angers 5 months ago e61da2d
test nested array/object, streamline errors
3 files changed, 114 insertions(+), 35 deletions(-)

M zerojson.go
M zerojson_bench_test.go
M zerojson_test.go
M zerojson.go => zerojson.go +30 -22
@@ 1,6 1,7 @@ package zerojson
  
  import (
+ 	"fmt"
  	"io"
  )
  


@@ 12,14 13,9 @@ }
  
  const (
- 	// ErrIncompleteToken is the error representing an incomplete true, false or
- 	// null token.
- 	ErrIncompleteToken errorString = "incomplete token"
- 
- 	// ErrUnclosedString is the error representing an unclosed string value, i.e.
- 	// when the opening quote was encountered, but EOF was reached before the closing
- 	// quote, or an unescaped code point that must be escaped is found.
- 	ErrUnclosedString errorString = "unclosed string"
+ 	// ErrIncompleteValue is the error representing an incomplete true, false or
+ 	// null token, an unclosed string, object or array, or an incomplete number.
+ 	ErrIncompleteValue errorString = "incomplete value"
  
  	// ErrInvalidCodePoint is the error representing an invalid code point. This is
  	// any code point disallowed in the JSON grammar (e.g. "!" outside a string value),


@@ 31,9 27,6 @@ // ErrIncompleteHexEsc is the error representing an invalid \uXXXX hexadecimal
  	// escape sequence in a string value.
  	ErrIncompleteHexEsc errorString = "incomplete hexadecimal escape sequence"
- 
- 	// ErrIncompleteNumber is the error representing an invalid number value.
- 	ErrIncompleteNumber errorString = "incomplete number"
  )
  
  // JSON supports 7 different values:


@@ 47,10 40,6 @@ // - Null, starting with 'n'
  //
  // Of those, Object and Array can nest and as such require a stack.
- // However, since String and Number can be arbitrarily long, when tokenizing
- // with a limited buffer, those values may be broken down in multiple chunks
- // too.
- //
  // Also, when processing an Object, the parser must keep track of whether
  // it is processing a key or a value (both may be strings).
  


@@ 163,6 152,7 @@ allow byte // '\x00'=any value, '<'=key, '>'=value, ':'=colon, ','=comma or ']' or '}'
  	pos   int
  	stack stack
+ 	debug bool
  }
  
  func (p *parser) parse() error {


@@ 183,6 173,11 @@ typ = p.cur
  		p.advance()
  
+ 		if p.debug {
+ 			peek := p.stack.peek()
+ 			fmt.Printf("%d: stack=%c ; allow=%c ; char=%c\n", start, peek, typ, p.allow)
+ 		}
+ 
  		switch p.allow {
  		case ':':
  			if typ == ':' {


@@ 239,6 234,19 @@ default:
  			if p.allow == '>' {
  				p.allow = ','
+ 
+ 				// special case for '>', if inside an array, can be a ']'
+ 				if inside := p.stack.peek(); inside == '[' && typ == ']' {
+ 					p.stack.pop()
+ 					peek := p.stack.peek()
+ 					switch peek {
+ 					case '{', '[':
+ 						p.allow = ','
+ 					default:
+ 						p.allow = 0
+ 					}
+ 					break
+ 				}
  			}
  
  			switch typ {


@@ 284,7 292,7 @@ first = p.cur
  			p.advance()
  		} else {
- 			return ErrIncompleteNumber
+ 			return ErrIncompleteValue
  		}
  	}
  


@@ 304,7 312,7 @@ p.advance()
  		}
  		if !digitSeen {
- 			return ErrIncompleteNumber
+ 			return ErrIncompleteValue
  		}
  	}
  


@@ 321,7 329,7 @@ p.advance()
  		}
  		if !digitSeen {
- 			return ErrIncompleteNumber
+ 			return ErrIncompleteValue
  		}
  	}
  


@@ 337,11 345,11 @@ return err
  			}
  		case p.eof():
- 			return ErrUnclosedString
+ 			return ErrIncompleteValue
  		case p.cur <= 0x1F:
  			// do not advance, the control character will be considered outside
  			// the string and will generate a distinct ErrInvalidCodePoint.
- 			return ErrUnclosedString
+ 			return ErrIncompleteValue
  		default:
  			p.advance()
  		}


@@ 365,7 373,7 @@ // invalid escape, move back to previous byte and report unclosed
  		// string, then treat the single backslash as an invalid code point.
  		p.back()
- 		return ErrUnclosedString
+ 		return ErrIncompleteValue
  	}
  }
  


@@ 386,7 394,7 @@ func (p *parser) scanToken(trail string) error {
  	for _, b := range []byte(trail) {
  		if p.cur != b {
- 			return ErrIncompleteToken
+ 			return ErrIncompleteValue
  		}
  		p.advance()
  	}

M zerojson_bench_test.go => zerojson_bench_test.go +5 -1
@@ 27,7 27,11 @@ }
  
  func BenchmarkSmallObject(b *testing.B) {
- 	benchmarkInput(b, `{"key": true}`)
+ 	benchmarkInput(b, `{"active": true, "name": "Foo Bar", "age": 42}`)
+ }
+ 
+ func BenchmarkSmallArray(b *testing.B) {
+ 	benchmarkInput(b, `["a string", -10.234, false, null]`)
  }
  
  func benchmarkInput(b *testing.B, input string) {

M zerojson_test.go => zerojson_test.go +79 -12
@@ 20,9 20,9 @@ {"", "", nil},
  		{" \t\n ", "", nil},
  
- 		{" nul ", "1: n: nul", ErrIncompleteToken},
- 		{"t", "0: t: t", ErrIncompleteToken},
- 		{"\t\n fa", "3: f: fa", ErrIncompleteToken},
+ 		{" nul ", "1: n: nul", ErrIncompleteValue},
+ 		{"t", "0: t: t", ErrIncompleteValue},
+ 		{"\t\n fa", "3: f: fa", ErrIncompleteValue},
  
  		{"null", "0: n: null", nil},
  		{"true", "0: t: true", nil},


@@ 39,10 39,11 @@ {`"a"`, `0: ": "a"`, nil},
  		{" \t\n" + `"" `, `3: ": ""`, nil},
  		{`"a\bc\"\t\n\\\u0123"`, `0: ": "a\bc\"\t\n\\\u0123"`, nil},
- 		{`"\"`, "0: \": \"\\\"", ErrUnclosedString},
- 		{`"\a"`, "0: \": \"", ErrUnclosedString},
+ 		{`"`, "0: \": \"", ErrIncompleteValue},
+ 		{`"\"`, "0: \": \"\\\"", ErrIncompleteValue},
+ 		{`"\a"`, "0: \": \"", ErrIncompleteValue},
  		{`"\uabc"`, "0: \": \"\\uabc", ErrIncompleteHexEsc},
- 		{"\"\x00\"", "0: \": \"", ErrUnclosedString},
+ 		{"\"\x00\"", "0: \": \"", ErrIncompleteValue},
  
  		{"0", "0: 1: 0", nil},
  		{"1", "0: 1: 1", nil},


@@ 54,7 55,7 @@ {"7", "0: 1: 7", nil},
  		{"8", "0: 1: 8", nil},
  		{"9", "0: 1: 9", nil},
- 		{"00", "0: 1: 0\n1: 1: 0\n", nil}, // TODO: would it be better to error?
+ 		{"00", "0: 1: 0\n1: 1: 0\n", nil},
  		{"-0", "0: 1: -0", nil},
  		{"-1", "0: 1: -1", nil},
  		{"-2", "0: 1: -2", nil},


@@ 65,12 66,13 @@ {"-7", "0: 1: -7", nil},
  		{"-8", "0: 1: -8", nil},
  		{"-9", "0: 1: -9", nil},
- 		{"-01", "0: 1: -0\n2: 1: 1", nil}, // TODO: same
+ 		{"-01", "0: 1: -0\n2: 1: 1", nil},
  		{"1234567890", "0: 1: 1234567890", nil},
  		{"-1234567890", "0: 1: -1234567890", nil},
  		{"1.0123456789", "0: 1: 1.0123456789", nil},
- 		{"1.", "0: 1: 1.", ErrIncompleteNumber},
- 		{"-", "0: 1: -", ErrIncompleteNumber},
+ 		{"1.", "0: 1: 1.", ErrIncompleteValue},
+ 		{"-", "0: 1: -", ErrIncompleteValue},
+ 		{"-1e", "0: 1: -1e", ErrIncompleteValue},
  		{"0.123456789", "0: 1: 0.123456789", nil},
  		{"-0.123456789", "0: 1: -0.123456789", nil},
  


@@ 98,6 100,71 @@   		{"{}", "0: {: {\n1: }: }", nil},
  		{`{"a" : 1}`, "0: {: {\n1: \": \"a\"\n7: 1: 1\n8: }: }", nil},
+ 		{` {"a" : 1,"b":true , "c" : "d" } `, `
+ 1: {: {
+ 2: ": "a"
+ 8: 1: 1
+ 10: ": "b"
+ 14: t: true
+ 21: ": "c"
+ 27: ": "d"
+ 31: }: }
+ `, nil},
+ 		{`{"a": {"b": {"c": "d", "e": 1}, "f": null}}`, `
+ 0: {: {
+ 1: ": "a"
+ 6: {: {
+ 7: ": "b"
+ 12: {: {
+ 13: ": "c"
+ 18: ": "d"
+ 23: ": "e"
+ 28: 1: 1
+ 29: }: }
+ 32: ": "f"
+ 37: n: null
+ 41: }: }
+ 42: }: }
+ `, nil},
+ 
+ 		{"[]", "0: [: [\n1: ]: ]", nil},
+ 		{"[true]", "0: [: [\n1: t: true\n5: ]: ]", nil},
+ 		{"[true,1 , false ]", `
+ 0: [: [
+ 1: t: true
+ 6: 1: 1
+ 10: f: false
+ 16: ]: ]
+ `, nil},
+ 
+ 		{`[1.02 , "a\b\"" , [ null, [] ], false]`, `
+ 0: [: [
+ 1: 1: 1.02
+ 8: ": "a\b\""
+ 18: [: [
+ 20: n: null
+ 26: [: [
+ 27: ]: ]
+ 29: ]: ]
+ 32: f: false
+ 37: ]: ]
+ `, nil},
+ 
+ 		{`[{}]`, `
+ 0: [: [
+ 1: {: {
+ 2: }: }
+ 3: ]: ]
+ `, nil},
+ 		{`{"": []}`, `
+ 0: {: {
+ 1: ": ""
+ 5: [: [
+ 6: ]: ]
+ 7: }: }
+ `, nil},
+ 
+ 		{"{", "0: {: {", nil},
  	}
  
  	for _, c := range cases {


@@ 115,14 182,14 @@ },
  			}
  			err := p.parse()
+ 			require.Equal(t, strings.TrimSpace(c.out), strings.TrimSpace(buf.String()))
+ 
  			if c.err != nil {
  				require.Error(t, err)
  				require.Equal(t, c.err, err)
  			} else {
  				require.NoError(t, err)
  			}
- 
- 			require.Equal(t, strings.TrimSpace(c.out), strings.TrimSpace(buf.String()))
  		})
  	}
  }