~mrms/jsonum

4bab4740e693666a7057ad5764335b606ef26840 — Marek Maškarinec 1 year, 1 month ago
Init commit
4 files changed, 294 insertions(+), 0 deletions(-)

A .build.yml
A README.md
A json.um
A pak.json
A  => .build.yml +27 -0
@@ 1,27 @@
image: debian/bullseye
packages:
- python3
- python3-pip
sources:
- https://git.sr.ht/~mrms/jsonum
- https://git.sr.ht/~mrms/pak
environment:
  project: jsonum
secrets:
- 9fb61155-1fd0-4a38-ac48-fc2557f98065
shell: false
tasks:
- setup: |
    cd pak
    python3 -m pip install -r requirements.txt
    ./setup.py install --user
- build: |
    cd jsonum
    python3 -m pak update
    python3 -m pak build
- deploy: |
    cd jsonum
    set +x
    python3 -m pak upload -t `cat ../.secret` pak.tar
    python3 -m pak upload -t `cat ../.secret` pak.json
    set -x

A  => README.md +7 -0
@@ 1,7 @@
# json

JSON decoder library

## license

Unlicense/MIT

A  => json.um +247 -0
@@ 1,247 @@

import (
	"std.um" // change me for your std.um location
)

const (
	tok_null = 0
	tok_opener
	tok_closer
	tok_colon
	tok_str
	tok_int
	tok_real
	tok_const
	tok_separator
	tok_lopener
	tok_lcloser
)

type lexer = struct {
	inp: str
	len: int
	pos: int
	lineno: int
}

type token = struct {
	t: int
	pos: int
	value: str
}

fn (l: ^lexer) get(): char {
	if l.pos >= l.len { return '\0' }
	l.pos++
	return l.inp[l.pos-1]
}

fn (l: ^lexer) lex_str(): str {
	const STEP = 64
	out := make([]char, STEP)
	buflen := STEP
	start := l.pos
	for true {
		c := l.inp[l.pos]
		l.pos++

		if l.pos > 1 && c == '"' && l.inp[l.pos-2] != '\\' {
			break
		}

		if l.pos-start >= buflen {
			out = append(out, make([]char, STEP * 4))
			buflen += STEP * 4
		}

		out[l.pos-start-1] = c
	}
	return out
}

fn is_num(inp: char): bool {
	return ((inp >= '0' && inp <= '9') || inp == '.')
}

fn (l: ^lexer) lex_num(): (str, bool) {
	out := ""
	is_real := false

	for true {
		c := l.get()
		if c == '.' { is_real = true }
		if !is_num(c) { break }
		out += c
	}

	return out, is_real 
}

fn (l: ^lexer) lex_space(): str {
	out := ""
	l.pos--
	c := l.get()
	for c != ' ' && c != '}' && c != ']' && c != '\n' && c != ',' && c != '\t' {
		out += c
		c = l.get()
	}
	l.pos--
	return out
}

fn (l: ^lexer) lex_next(): (token, bool) {
	for l.pos < l.len && (l.inp[l.pos] == ' ' || l.inp[l.pos] == '\n' || l.inp[l.pos] == '\t') {
		if l.inp[l.pos] == '\n' {
			l.lineno++
		}
		l.pos++
	}

	switch l.get() {
	case '{':
		return token{tok_opener, l.pos, "{"}, true
	case '}':
		return token{tok_closer, l.pos, "}"}, true
	case '[':
		return token{tok_lopener, l.pos, "["}, true
	case ']':
		return token{tok_lcloser, l.pos, "]"}, true
	case '"': // are ' or ` strings allowed?
		return token{tok_str, l.pos, l.lex_str()}, true
	case '\0':
		return token{}, false
	case ':':
		return token{tok_colon, l.pos, str(l.inp[l.pos-1])}, true
	case ',':
		return token{tok_separator, l.pos, str(l.inp[l.pos-1])}, true
	default:
		if l.inp[l.pos-1] == '-' || is_num(l.inp[l.pos-1]) {
			first := l.inp[l.pos-1]
			val, is_real := l.lex_num()
			t := tok_int
			if is_real { t = tok_real }
			l.pos--
			return token{t, l.pos, first + val}, true
		}

		val := l.lex_space()
		return token{tok_const, l.pos, val}, true
	}

	return token{}, false
}

fn (l: ^lexer) parser_error(msg: str) {
	printf("error %d:%d: %s\n", l.lineno, msg)
}

fn (l: ^lexer) parse_object(): map[str]any
fn (l: ^lexer) parse_array(): []any

fn (l: ^lexer) parse_val(t: token): any {
	switch (t.t) {
	case tok_str:
		return t.value
	case tok_int:
		return std.atoi(t.value)
	case tok_real:
		return std.atof(t.value)
	case tok_opener:
		return l.parse_object()
	case tok_lopener:
		return l.parse_array()
	case tok_const:
		if t.value == "true" {
			return true
		} else if t.value == "false" {
			return false
		} else if t.value == "null" {
			return null
		} else {
			l.parser_error("unknonw constant")
		}
	default:
		l.parser_error("unsupported json feature")
	}

	return null
}

fn (l: ^lexer) parse_object(): map[str]any {
	var key: str
	var val: any
	var out: map[str]any
	
	// this looks horrible
	t, stay := l.lex_next()
	for stay && t.t != tok_closer {
		if t.t == tok_str {
			next, stay := l.lex_next()
			if next.t != tok_colon {
				l.parser_error("missing colon")
				break
			}

			key = t.value
			next, stay = l.lex_next()
			val = l.parse_val(next)
			next, stay = l.lex_next()

			if stay && next.t != tok_separator && next.t != tok_closer {
				l.parser_error("missing comma.")
			}

			out[key] = val
			if next.t == tok_closer {
				break
			}
		}
		t, stay = l.lex_next()
	}

	return out
}

fn (l: ^lexer) parse_array(): []any {
	out := []any{}

	stay := true
	t := token{}
	for stay && t.t != tok_lcloser {
		t, stay = l.lex_next()
		if t.t == tok_lcloser {
			break
		}

		out = append(out, l.parse_val(t))
		t, stay = l.lex_next()

		if stay && (t.t != tok_separator && t.t != tok_lcloser) {
			l.parser_error("array elements are not separated correctly")
			break
		}
	}

	return out
}

//~~fn parse
// parses json provided as an input and returns either map[str]any or []any
fn parse*(inp: str): any {
//~~
	l := lexer{inp, len(inp), 0, 1}

	t, end := l.lex_next()
	var out: any

	switch (t.t) {
	case tok_opener:
		out = l.parse_object()
	case tok_lopener:
		out = l.parse_array()
	default:
		l.parser_error("top level type can only be an object or an array")
	}

	return out
}

A  => pak.json +13 -0
@@ 1,13 @@
{
    "name": "json",
    "version": "v0.1.0",
    "author": "Marek Maškarinec <marek@mrms.cz>",
    "license": "Unlicense/MIT",
    "description": "JSON decoder library",
    "readme": "README.md",
    "link": "",
    "dependencies": [],
    "include": ["json.um"],
    "run_posix": "./pak/umka/linux/umka json.um",
    "run_windows": ".\\pak\\umka\\windows\\umka.exe json.um"
}