45f0f1c7c55e39c0b55c1dfe315f422fb0821848 — Martin Angers 6 months ago
initial commit
2 files changed, 182 insertions(+), 0 deletions(-)

A zerojson.go
A zerojson_test.go
A  => zerojson.go +137 -0
@@ 1,137 @@
+package zerojson
+
+import "errors"
+
+// JSON supports 7 different values:
+//
+// - Object, starting with '{'
+// - Array, starting with '['
+// - String, starting with '"'
+// - Number, starting with '-' or [0-9]
+// - True, starting with 't'
+// - False, starting with 'f'
+// - Null, starting with 'n'
+//
+// Of those, Object and Array can nest and as such require a stack.
+// However, since String and Number can be arbitrarily long, when tokenizing
+// with a limited buffer, those values may be broken down in multiple chunks
+// too.
+//
+// Also, when processing an Object, the parser must keep track of whether
+// it is processing a key or a value (both may be strings).
+
+const (
+	staticStackSize = 4
+
+	trueTrail  = "rue"
+	falseTrail = "alse"
+	nullTrail  = "ull"
+
+	// Code point 00 (NUL) is invalid in JSON, use it as eof marker
+	eof = '\x00'
+)
+
+type stack struct {
+	depth int
+
+	// each uint64 can store 64 levels deep (1 bit is sufficient per level)
+	static [staticStackSize]uint64
+
+	// for very deeply-nested JSON, resort to allocation
+	dynamic []uint64
+}
+
+// It is expected that v is either '{' or '['. Behaviour is undefined
+// if a different value is provided.
+func (s *stack) push(v byte) {
+	// depth 0 means nothing pushed on the stack, so to find the bit
+	// index we need to get depth *before* incrementing it, and then
+	// module 64 as each uint64 has 64 bits.
+	wordIndex := s.depth / 64
+	bitIndex := s.depth % 64
+	s.depth++
+
+	// set bit to 1 for Object, 0 for Array, by dividing the byte
+	// by '{'. That is:
+	// '{' / '{' == 1 ==> Object
+	// '[' / '{' == 0 ==> Array
+	bit := uint64(v / '{')
+
+	if wordIndex < staticStackSize {
+		s.static[wordIndex] |= bit << uint(bitIndex)
+		return
+	}
+
+	wordIndex -= staticStackSize
+	if wordIndex >= len(s.dynamic) {
+		s.dynamic = append(s.dynamic, 0)
+	}
+	s.dynamic[wordIndex] |= bit << uint(bitIndex)
+}
+
+func (s *stack) pop() byte {
+	s.depth--
+	wordIndex := s.depth / 64
+	bitIndex := s.depth % 64
+
+	var word uint64
+	if wordIndex < staticStackSize {
+		word = s.static[wordIndex]
+	} else {
+		wordIndex -= staticStackSize
+		word = s.dynamic[wordIndex]
+	}
+	if word&(1<<uint(bitIndex)) == 0 {
+		return '['
+	}
+	return '{'
+}
+
+type parser struct {
+	input []byte
+	cur   byte
+	pos   int
+
+	stack  stack
+	tok    byte // current token, '{', '[', '"', '1', 't', 'f', 'n'
+	keyVal byte // when in an object, indicates if we're on key (':') or value (',')
+}
+
+func (p *parser) scan() error {
+	p.skipWhitespace()
+
+	switch p.cur {
+	case '{':
+		// object
+	case '[':
+		// array
+	case '"':
+		// string
+	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+		// number
+	case 't':
+		// true
+	case 'f':
+		// false
+	case 'n':
+		// null
+	default:
+		// invalid character
+	}
+	return errors.New("not implemented")
+}
+
+func (p *parser) advance() {
+	if p.pos >= len(p.input)-1 {
+		p.cur = eof
+		return
+	}
+	p.pos++
+	p.cur = p.input[p.pos]
+}
+
+func (p *parser) skipWhitespace() {
+	for p.cur == '\t' || p.cur == '\r' || p.cur == '\n' || p.cur == ' ' {
+		p.advance()
+	}
+}

A  => zerojson_test.go +45 -0
@@ 1,45 @@
+package zerojson
+
+import (
+	"math/rand"
+	"testing"
+	"time"
+)
+
+func TestStack(t *testing.T) {
+	seed := time.Now().UnixNano()
+	rnd := rand.New(rand.NewSource(seed))
+
+	// make sure to bleed into dynamic allocation
+	max := staticStackSize*64 + rnd.Intn(1000) + 1
+
+	// generate the slice of values to push and pop
+	vals := make([]byte, max)
+	for i := range vals {
+		if rnd.Intn(2) == 1 {
+			vals[i] = '{'
+		} else {
+			vals[i] = '['
+		}
+	}
+	t.Logf("seed=%d\nmax=%d\nvalues=%s", seed, max, string(vals))
+
+	var s stack
+
+	// repeat a couple times, so we push after pop
+	for count := 0; count < 2; count++ {
+		for _, v := range vals {
+			s.push(v)
+		}
+
+		for i := len(vals) - 1; i >= 0; i-- {
+			v := s.pop()
+			if v != vals[i] {
+				t.Fatalf("at %d: want %x, got %x", i, vals[i], v)
+			}
+		}
+		if s.depth != 0 {
+			t.Fatalf("depth should be 0, is %d", s.depth)
+		}
+	}
+}