~mna/zerojson

45f0f1c7c55e39c0b55c1dfe315f422fb0821848 — Martin Angers 1 year, 2 months ago
initial commit
2 files changed, 182 insertions(+), 0 deletions(-)

A zerojson.go
A zerojson_test.go
A  => zerojson.go +137 -0
@@ 1,137 @@
package zerojson

import "errors"

// JSON supports 7 different values:
//
// - Object, starting with '{'
// - Array, starting with '['
// - String, starting with '"'
// - Number, starting with '-' or [0-9]
// - True, starting with 't'
// - False, starting with 'f'
// - Null, starting with 'n'
//
// Of those, Object and Array can nest and as such require a stack.
// However, since String and Number can be arbitrarily long, when tokenizing
// with a limited buffer, those values may be broken down in multiple chunks
// too.
//
// Also, when processing an Object, the parser must keep track of whether
// it is processing a key or a value (both may be strings).

const (
	staticStackSize = 4

	trueTrail  = "rue"
	falseTrail = "alse"
	nullTrail  = "ull"

	// Code point 00 (NUL) is invalid in JSON, use it as eof marker
	eof = '\x00'
)

type stack struct {
	depth int

	// each uint64 can store 64 levels deep (1 bit is sufficient per level)
	static [staticStackSize]uint64

	// for very deeply-nested JSON, resort to allocation
	dynamic []uint64
}

// It is expected that v is either '{' or '['. Behaviour is undefined
// if a different value is provided.
func (s *stack) push(v byte) {
	// depth 0 means nothing pushed on the stack, so to find the bit
	// index we need to get depth *before* incrementing it, and then
	// module 64 as each uint64 has 64 bits.
	wordIndex := s.depth / 64
	bitIndex := s.depth % 64
	s.depth++

	// set bit to 1 for Object, 0 for Array, by dividing the byte
	// by '{'. That is:
	// '{' / '{' == 1 ==> Object
	// '[' / '{' == 0 ==> Array
	bit := uint64(v / '{')

	if wordIndex < staticStackSize {
		s.static[wordIndex] |= bit << uint(bitIndex)
		return
	}

	wordIndex -= staticStackSize
	if wordIndex >= len(s.dynamic) {
		s.dynamic = append(s.dynamic, 0)
	}
	s.dynamic[wordIndex] |= bit << uint(bitIndex)
}

func (s *stack) pop() byte {
	s.depth--
	wordIndex := s.depth / 64
	bitIndex := s.depth % 64

	var word uint64
	if wordIndex < staticStackSize {
		word = s.static[wordIndex]
	} else {
		wordIndex -= staticStackSize
		word = s.dynamic[wordIndex]
	}
	if word&(1<<uint(bitIndex)) == 0 {
		return '['
	}
	return '{'
}

type parser struct {
	input []byte
	cur   byte
	pos   int

	stack  stack
	tok    byte // current token, '{', '[', '"', '1', 't', 'f', 'n'
	keyVal byte // when in an object, indicates if we're on key (':') or value (',')
}

func (p *parser) scan() error {
	p.skipWhitespace()

	switch p.cur {
	case '{':
		// object
	case '[':
		// array
	case '"':
		// string
	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		// number
	case 't':
		// true
	case 'f':
		// false
	case 'n':
		// null
	default:
		// invalid character
	}
	return errors.New("not implemented")
}

func (p *parser) advance() {
	if p.pos >= len(p.input)-1 {
		p.cur = eof
		return
	}
	p.pos++
	p.cur = p.input[p.pos]
}

func (p *parser) skipWhitespace() {
	for p.cur == '\t' || p.cur == '\r' || p.cur == '\n' || p.cur == ' ' {
		p.advance()
	}
}

A  => zerojson_test.go +45 -0
@@ 1,45 @@
package zerojson

import (
	"math/rand"
	"testing"
	"time"
)

func TestStack(t *testing.T) {
	seed := time.Now().UnixNano()
	rnd := rand.New(rand.NewSource(seed))

	// make sure to bleed into dynamic allocation
	max := staticStackSize*64 + rnd.Intn(1000) + 1

	// generate the slice of values to push and pop
	vals := make([]byte, max)
	for i := range vals {
		if rnd.Intn(2) == 1 {
			vals[i] = '{'
		} else {
			vals[i] = '['
		}
	}
	t.Logf("seed=%d\nmax=%d\nvalues=%s", seed, max, string(vals))

	var s stack

	// repeat a couple times, so we push after pop
	for count := 0; count < 2; count++ {
		for _, v := range vals {
			s.push(v)
		}

		for i := len(vals) - 1; i >= 0; i-- {
			v := s.pop()
			if v != vals[i] {
				t.Fatalf("at %d: want %x, got %x", i, vals[i], v)
			}
		}
		if s.depth != 0 {
			t.Fatalf("depth should be 0, is %d", s.depth)
		}
	}
}