~mna/zerojson

780b8ea647d6b91e344b6dd20401a7f05b7f6317 — Martin Angers 1 year, 14 days ago 2f03901
scan strings, start escapes
3 files changed, 118 insertions(+), 20 deletions(-)

M zerojson.go
A zerojson_bench_test.go
M zerojson_test.go
M zerojson.go => zerojson.go +75 -20
@@ 1,7 1,6 @@
package zerojson

import (
	"fmt"
	"io"
)



@@ 13,9 12,21 @@ func (e errorString) Error() string {
}

const (
	// ErrIncompleteLiteral is the error representing an incomplete true, false or
	// null literal.
	ErrIncompleteLiteral errorString = "incomplete literal"
	// ErrIncompleteToken is the error representing an incomplete true, false or
	// null token.
	ErrIncompleteToken errorString = "incomplete token"

	// ErrUnclosedString is the error representing an unclosed string value, i.e.
	// when the opening quote was encountered, but EOF was reached before the closing
	// quote, or an unescaped code point that must be escaped is found.
	ErrUnclosedString errorString = "unclosed string"

	// ErrInvalidCodePoint is the error representing an invalid code point. This is
	// any code point disallowed in the JSON grammar (e.g. "!" outside a string value),
	// as well as an unescaped code point that is only allowed in escaped form within
	// a string (quotation mark U+0022, reverse solidus U+005C, and the control characters
	// U+0000 to U+001F).
	ErrInvalidCodePoint errorString = "invalid code point"
)

// JSON supports 7 different values:


@@ 42,9 53,6 @@ const (
	trueTrail  = "rue"
	falseTrail = "alse"
	nullTrail  = "ull"

	// Code point 00 (NUL) is invalid in JSON, use it as eof marker
	eof = '\x00'
)

type stack struct {


@@ 134,7 142,7 @@ type parser struct {
	// 't' : true (v is the full literal)
	// 'f' : false (v is the full literal)
	// 'n' : null (v is the full literal)
	// '\x00' : unknown or eof, only provided with a non-nil err
	// other : unknown or eof, only provided with a non-nil err
	//
	// The value v is a slice into the original input, it should not be
	// modified. If err is not nil, typ represents the type that it should


@@ 153,7 161,7 @@ func (p *parser) parse() error {
	p.advance()

loop:
	for p.cur != eof {
	for {
		p.skipWhitespace()

		var (


@@ 171,23 179,24 @@ loop:
		case '[':
			p.stack.push(typ)
		case '"':
			//err = p.scanString()
			err = p.scanString()
		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			//first := typ
			typ = '1'
			//err = p.scanNumber(first)
		case 't':
			err = p.scanLiteral(trueTrail)
			err = p.scanToken(trueTrail)
		case 'f':
			err = p.scanLiteral(falseTrail)
			err = p.scanToken(falseTrail)
		case 'n':
			err = p.scanLiteral(nullTrail)
		case eof:
			// TODO: if a token was required, error
			break loop
			err = p.scanToken(nullTrail)

		default:
			err = fmt.Errorf("invalid character: %#U", typ)
			if p.eof() {
				// TODO: if a token was required, error
				break loop
			}
			err = ErrInvalidCodePoint
		}

		if e := p.emit(start, typ, p.input[start:p.pos], err); e != nil {


@@ 197,26 206,72 @@ loop:
	return p.emit(p.pos, p.cur, nil, io.EOF)
}

func (p *parser) scanLiteral(trail string) error {
func (p *parser) scanString() error {
	for p.cur != '"' {
		switch cur := p.cur; {
		case cur == '\\':
			// TODO: parse escape
			p.advance()
			if err := p.scanEscape(); err != nil {
				return err
			}
		case p.eof():
			return ErrUnclosedString
		case cur <= 0x1F:
			// do not advance, the control character will be considered outside
			// the string and will generate a distinct ErrInvalidCodePoint.
			return ErrUnclosedString
		}
		p.advance()
	}
	return nil
}

func (p *parser) scanEscape() error {
	switch p.cur {
	case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
		p.advance()
		return nil
	case 'u', 'U':
		// TODO: scan 4 hex digits
		return nil
	default:
		// invalid escape, move back to previous byte and report unclosed
		// string, then treat the single backslash as an invalid code point.
		p.back()
		return ErrUnclosedString
	}
}

func (p *parser) scanToken(trail string) error {
	for _, b := range []byte(trail) {
		if p.cur != b {
			return ErrIncompleteLiteral
			return ErrIncompleteToken
		}
		p.advance()
	}
	return nil
}

func (p *parser) eof() bool {
	return p.cur == 0 && p.pos >= len(p.input)
}

func (p *parser) advance() {
	if p.pos >= len(p.input)-1 {
		p.pos++
		p.cur = eof
		p.cur = 0
		return
	}
	p.pos++
	p.cur = p.input[p.pos]
}

func (p *parser) back() {
	p.pos--
	p.cur = p.input[p.pos]
}

func (p *parser) skipWhitespace() {
	for p.cur == '\t' || p.cur == '\r' || p.cur == '\n' || p.cur == ' ' {
		p.advance()

A zerojson_bench_test.go => zerojson_bench_test.go +38 -0
@@ 0,0 1,38 @@
package zerojson

import (
	"fmt"
	"io"
	"testing"
)

func BenchmarkTrue(b *testing.B) {
	benchmarkInput(b, "true")
}

func BenchmarkFalse(b *testing.B) {
	benchmarkInput(b, "false")
}

func BenchmarkNull(b *testing.B) {
	benchmarkInput(b, "null")
}

func benchmarkInput(b *testing.B, input string) {
	p := parser{
		input: []byte(fmt.Sprintf(" %s ", input)),
		emit: func(offset int, typ byte, v []byte, err error) error {
			if err == io.EOF {
				return nil
			}
			return err
		},
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		p.stack = stack{}
		if err := p.parse(); err != nil {
			b.Fatal(err)
		}
	}
}

M zerojson_test.go => zerojson_test.go +5 -0
@@ 19,6 19,11 @@ func TestParser(t *testing.T) {
	}{
		{"", "", nil},
		{" \t\n ", "", nil},

		{" nul ", "1: n: nul", ErrIncompleteToken},
		{"t", "0: t: t", ErrIncompleteToken},
		{"\t\n fa", "3: f: fa", ErrIncompleteToken},

		{"null", "0: n: null", nil},
		{"true", "0: t: true", nil},
		{"false", "0: f: false", nil},