~cricket/zckt

4559919d58e1165bdc55ed5b9788e017b34b0374 — c piapiac a month ago ac62d86
split to parser and tokenizer files to deal with easier
4 files changed, 590 insertions(+), 546 deletions(-)

M README
A parser.zig
A tokenizer.zig
M zckt.zig
M README => README +3 -3
@@ 5,7 5,7 @@
    zig ckt parser

ABOUT
    this parser will parse a ckt file into to a stringhashmap
    this parser will parse a ckt file into to a string hashmap
    ckt is a stupid simple file format for notating tables
    you can view more about it on https://sr.ht/~cricket/ckt



@@ 15,10 15,10 @@ USAGE

    that being said,
    ---
    const zckt = @import("zckt").Parser;
    const ckt = @import("zckt").parser.table_parser;

    // ...
    zkct.parse(data, allocator);
    ckt.parse(data, allocator);
    // returns a table of key/value pairs you can work with,
    // with values being either strings or tables.


A parser.zig => parser.zig +328 -0
@@ 0,0 1,328 @@
const std = @import("std");
const fmt = std.fmt;
const mem = std.mem;
const meta = std.meta;
const debug = std.debug;

const tknzr = @import("tokenizer.zig");
const Token = tknzr.Token;
const Tokenizer = tknzr.Tokenizer;

const MemError = error{OutOfMemory};

const StringError = error{
    UnexpectedNewline,
    InvalidEscapeChar,
    InvalidHexEscape,
    InvalidUnicodeEscape,
};

fn skipWhitespace(str: []const u8, i: *usize) void {
    while (i.* < str.len) : (i.* += 1) {
        switch (str[i.*]) {
            ' ', '\t', 0x0B, 0x0C, '\r' => continue,
            else => return,
        }
    }
}

// i'm not sure if there's a way to do this without allocations
fn parseMultilineString(str: []const u8, a: *mem.Allocator) MemError![]const u8 {
    const State = enum { root, line };

    var state = State.line;
    var buf = std.ArrayList(u8).init(a);
    defer buf.deinit();

    var i: usize = 0;
    debug.assert(str[i] == '|');
    i += 1; // skip initial pipe

    while (i < str.len) : (i += 1) {
        switch (state) {
            .root => {
                skipWhitespace(str, &i);
                switch (str[i]) {
                    '|' => state = State.line,
                    else => return buf.toOwnedSlice(),
                }
            },
            .line => {
                const char = str[i];
                switch (char) {
                    '\\' => {
                        const next_is_newline_or_eof =
                            if (i + 1 < str.len) str[i + 1] == '\n' else true;

                        if (next_is_newline_or_eof) {
                            if (str[i - 1] == '\\') continue;
                            i += 1; // skip over newline
                            state = State.root;
                        } else {
                            try buf.append('\\');
                        }
                    },
                    '\n' => {
                        try buf.append('\n');
                        state = State.root;
                    },
                    else => try buf.append(char),
                }
            },
        }
    }

    // this shouldn't happen; the tokenizer should've caught and errored on this
    return buf.toOwnedSlice();
}

// ~~stolen~~ inspired by std/zig/string_literal.zig
// think about making this match json's string escapes to make CKT -> JSON easy peasy
// again, i'm not sure if there's a way to do this without allocations
fn parseQuotedString(
    str: []const u8,
    a: *mem.Allocator,
    comptime quote: u8,
) (MemError || StringError)![]const u8 {
    const State = enum { root, backslash };

    var state = State.root;
    var buf = std.ArrayList(u8).init(a);
    defer buf.deinit();

    var i: usize = 0;
    i += 1;

    while (i < str.len) : (i += 1) {
        const char = str[i];
        switch (state) {
            .root => switch (char) {
                '\n' => return error.UnexpectedNewline,
                '\\' => state = State.backslash,
                quote => {
                    i += 1;
                    return buf.toOwnedSlice();
                },
                else => {
                    try buf.append(char);
                },
            },
            .backslash => switch (char) {
                'f' => {
                    try buf.append(0x0C);
                    state = State.root;
                },
                'n' => {
                    try buf.append('\n');
                    state = State.root;
                },
                'r' => {
                    try buf.append('\r');
                    state = State.root;
                },
                't' => {
                    try buf.append('\t');
                    state = State.root;
                },
                'v' => {
                    try buf.append(0x0B);
                    state = State.root;
                },
                '\\', '"', '\'' => {
                    try buf.append(char);
                    state = State.root;
                },
                'x' => {
                    if (str.len < i + 3) return error.InvalidHexEscape;
                    if (fmt.parseUnsigned(u8, str[i + 1 .. i + 3], 16)) |byte| {
                        try buf.append(byte);
                        state = State.root;
                        i += 2;
                    } else |err| {
                        return error.InvalidHexEscape;
                    }
                },
                'u' => {
                    if (str.len < i + 5) return error.InvalidUnicodeEscape;
                    if (fmt.parseUnsigned(u16, str[i + 1 .. i + 5], 16)) |unic| {
                        try buf.appendSlice(mem.toBytes(unic)[0..]);
                        state = State.root;
                        i += 4;
                    } else |err| {
                        return error.InvalidUnicodeEscape;
                    }
                },
                'U' => {
                    if (str.len < i + 9) return error.InvalidUnicodeEscape;
                    if (fmt.parseUnsigned(u32, str[i + 1 .. i + 9], 16)) |unic| {
                        try buf.appendSlice(mem.toBytes(unic)[0..]);
                        state = State.root;
                        i += 8;
                    } else |err| {
                        return error.InvalidUnicodeEscape;
                    }
                },
                else => return error.InvalidEscapeChar,
            },
        }
    }

    // unclosed string
    // the tokenizer should've caught this
    return buf.toOwnedSlice();
}

fn parseString(tok: Token.String, a: *mem.Allocator) (MemError || StringError)![]const u8 {
    switch (tok) {
        .unquoted => |str| return a.dupe(u8, str),
        .quoted => |str| return switch (str[0]) {
            '"' => parseQuotedString(str, a, '"'),
            '\'' => parseQuotedString(str, a, '\''),
            else => @panic("unknown quote?"),
        },
        .multiline => |str| return parseMultilineString(str, a),
    }
}

// parse into a hashmap
// (is there a better way to name this ?)
pub const table_parser = struct {
    pub const Key = []const u8;

    pub const Value = union(enum) {
        string: []const u8,
        table: Table,

        pub fn deinit(self: *Value, a: *mem.Allocator) void {
            switch (self.*) {
                .string => |str| a.free(str),
                .table => |*table| table.deinit(),
            }
        }

        pub fn asString(self: Value) error{NotAString}![]const u8 {
            switch (self) {
                .string => |str| return str,
                .table => return error.NotAString,
            }
        }

        pub fn asTable(self: Value) error{NotATable}!Table {
            switch (self) {
                .string => return error.NotATable,
                .table => |table| return table,
            }
        }

        pub fn format(
            self: Value,
            comptime _: []const u8,
            _: fmt.FormatOptions,
            writer: anytype,
        ) !void {
            switch (self) {
                .string => |str| try writer.print("{s}", .{str}),
                .table => |table| try writer.print("[ ... ]", .{}),
            }
        }
    };

    pub const Table = struct {
        pub const Map = std.StringArrayHashMap(Value);

        map: Map,
        a: *mem.Allocator,

        pub fn init(a: *mem.Allocator) Table {
            return .{
                .map = Map.init(a),
                .a = a,
            };
        }

        pub fn deinit(self: *Table) void {
            var iter = self.map.iterator();

            while (iter.next()) |entry| {
                self.a.free(entry.key);
                entry.value.deinit(self.a);
            }

            self.map.deinit();
        }

        pub fn get(self: Table, key: []const u8) ?Value {
            return self.map.get(key);
        }

        pub fn getString(self: Table, key: []const u8) ?[]const u8 {
            const value = self.map.get(key) orelse return null;
            return value.asString() catch return null;
        }

        pub fn getTable(self: Table, key: []const u8) ?Table {
            const value = self.map.get(key) orelse return null;
            return value.asTable() catch return null;
        }

        pub fn put(self: *Table, key: []const u8, value: Value) !void {
            var result = try self.map.fetchPut(key, value);

            // free memory if needed
            if (result) |*entry| {
                self.a.free(key);
                entry.value.deinit(self.a);
            }
        }

        pub fn format(
            self: Table,
            comptime _: []const u8,
            _: fmt.FormatOptions,
            writer: anytype,
        ) !void {
            try writer.print("[ ", .{});
            var iter = self.map.iterator();
            while (iter.next()) |entry| try writer.print("{s}={s};", .{ entry.key, entry.value });
            try writer.print(" ]", .{});
        }
    };

    pub fn parseTable(
        tokens: *Tokenizer,
        a: *mem.Allocator,
    ) (MemError || StringError || Tokenizer.Error)!Table {
        var table = Table.init(a);
        var key: ?Key = null;
        var value: ?Value = null;
        var i: usize = 0;

        while (try tokens.next()) |token| {
            switch (token) {
                .table_end => return table,
                .keyeq => |str| key = try parseString(str, a),
                .table_start => value = .{ .table = try parseTable(tokens, a) },
                .value => |str| value = .{ .string = try parseString(str, a) },
            }

            if (value) |val| {
                const keystr = key orelse key: {
                    defer i += 1;
                    break :key try fmt.allocPrint(a, "{d}", .{i});
                };
                try table.put(keystr, val);
                value = null;
                key = null;
            }
        }

        // if no table end, return the table anyway;
        // any unclosed tables should've been caught by the tokenizer
        return table;
    }

    pub fn parse(data: []const u8, a: *mem.Allocator) !Table {
        var tokenizer = Tokenizer.init(data);
        return parseTable(&tokenizer, a);
    }
};

A tokenizer.zig => tokenizer.zig +249 -0
@@ 0,0 1,249 @@
const std = @import("std");
const debug = std.debug;
const fmt = std.fmt;
const mem = std.mem;

pub const Token = union(enum) {
    pub const String = union(enum) {
        unquoted: []const u8,
        quoted: []const u8,
        multiline: []const u8,

        pub fn format(
            self: String,
            comptime _: []const u8,
            _: fmt.FormatOptions,
            writer: anytype,
        ) !void {
            switch (self) {
                .unquoted => |str| try writer.print("unquoted: {s}", .{str}),
                .quoted => |str| try writer.print("quoted: {s}", .{str}),
                .multiline => |str| try writer.print("multiline: {s}", .{str}),
            }
        }
    };

    keyeq: String, // key =
    value: String, // value
    table_start, // [
    table_end, // ]

    pub fn format(
        self: Token,
        comptime _: []const u8,
        _: fmt.FormatOptions,
        writer: anytype,
    ) !void {
        switch (self) {
            .keyeq => |str| try writer.print("key {{{s}}} =", .{str}),
            .value => |str| try writer.print("value {{{s}}};", .{str}),
            .table_start => |str| try writer.print("[", .{}),
            .table_end => |str| try writer.print("]", .{}),
        }
    }
};

pub const Tokenizer = struct {
    pub const Error = error{
        EmptyKey,
        InvalidChar,
        UnexpectedNewline,
        UnexpectedEof,
    };

    content: []const u8,

    index: usize,
    last: Token,
    finished: bool,

    pub fn init(data: []const u8) Tokenizer {
        return Tokenizer{
            .content = data,

            .index = 0,
            .last = undefined,
            .finished = false,
        };
    }

    fn eof(self: *Tokenizer) Error!void {
        switch (self.last) {
            .table_start => return Error.UnexpectedEof,
            else => self.finished = true,
        }
    }

    fn skipToNewline(self: *Tokenizer) Error!void {
        while (self.content[self.index] != '\n') {
            self.index += 1;
            if (self.index >= self.content.len) {
                return self.eof();
            }
        }
    }

    fn skipPastNewline(self: *Tokenizer) Error!void {
        try self.skipToNewline();
        self.index += 1;
    }

    fn skipWhitespace(self: *Tokenizer) Error!void {
        while (self.index < self.content.len) : (self.index += 1) {
            switch (self.content[self.index]) {
                // actual whitespace
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                '#' => return try self.skipToNewline(),
                else => return,
            }
        }
    }

    fn skipWhitespaceAndNewlines(self: *Tokenizer) Error!void {
        while (self.index < self.content.len) : (self.index += 1) {
            switch (self.content[self.index]) {
                // actual whitespace
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                '#' => try self.skipToNewline(),
                '\n' => continue,
                else => return,
            }
        }
        try self.eof();
    }

    fn skipWhitespaceAndNewlinesAndBreaks(self: *Tokenizer) Error!void {
        while (self.index < self.content.len) : (self.index += 1) {
            switch (self.content[self.index]) {
                // actual whitespace
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                ';', ',' => continue,
                '#' => try self.skipToNewline(),
                '\n' => continue,
                else => return,
            }
        }
        try self.eof();
    }

    fn readString(self: *Tokenizer) ![]const u8 {
        const start_index = self.index;
        while (self.index < self.content.len) : (self.index += 1) {
            const char = self.content[self.index];
            switch (char) {
                '\n', '=', ';', ',', ']' => {
                    // remove trailing whitespace
                    // (eg key = value)
                    var end_index = self.index - 1;
                    while (end_index > start_index) {
                        switch (self.content[end_index]) {
                            ' ', '\t', 0x0B, 0x0C, '\r' => end_index -= 1,
                            else => return self.content[start_index .. end_index + 1],
                        }
                    }
                    return error.EmptyKey;
                },
                else => continue,
            }
        }

        // EOF
        try self.eof();
        return self.content[start_index..self.index];
    }

    fn readQuotedString(self: *Tokenizer, comptime quote: u8) Error![]const u8 {
        const start_index = self.index;

        self.index += 1;

        while (self.index < self.content.len) : (self.index += 1) {
            const char = self.content[self.index];
            switch (char) {
                quote => {
                    self.index += 1;
                    return self.content[start_index..self.index];
                },
                '\n' => return error.UnexpectedNewline,
                '\\' => self.index += 1,
                else => continue,
            }
        }

        // EOF before closing bracket
        return error.UnexpectedEof;
    }

    fn readMultilineString(self: *Tokenizer) Error![]const u8 {
        const start_index = self.index;

        const State = enum { root, line };
        var state = State.root;

        while (self.index < self.content.len) : (self.index += 1) {
            switch (state) {
                .root => {
                    const end_index = self.index;
                    try self.skipWhitespace();
                    switch (self.content[self.index]) {
                        '|' => state = State.line,
                        else => return self.content[start_index..end_index],
                    }
                },
                .line => {
                    switch (self.content[self.index]) {
                        '\n' => state = State.root,
                        else => continue,
                    }
                },
            }
        }

        // empty line or EOF
        return self.content[start_index..self.index];
    }

    pub fn next(self: *Tokenizer) Error!?Token {
        if (!self.finished) {
            self.last = ret: {
                // treat ; and , as whitespace
                try self.skipWhitespaceAndNewlinesAndBreaks();
                if (self.finished) return null;
                const char = self.content[self.index];
                switch (char) {
                    '[' => break :ret Token.table_start,
                    ']' => break :ret Token.table_end,
                    '=' => return error.InvalidChar,
                    else => {
                        var value: Token.String = switch (char) {
                            '"' => Token.String{ .quoted = try self.readQuotedString('"') },
                            '\'' => Token.String{ .quoted = try self.readQuotedString('\'') },
                            '|' => Token.String{ .multiline = try self.readMultilineString() },
                            else => Token.String{ .unquoted = try self.readString() },
                        };
                        try self.skipWhitespaceAndNewlines();
                        if (!self.finished) {
                            switch (self.content[self.index]) {
                                ';', ',' => break :ret Token{ .value = value },
                                '=' => break :ret Token{ .keyeq = value },
                                ']' => {
                                    self.index -= 1;
                                    break :ret Token{ .value = value };
                                },
                                else => {
                                    self.index -= 1;
                                    break :ret Token{ .value = value };
                                },
                            }
                        } else break :ret Token{ .value = value };
                    },
                }
            };
            self.index += 1;
            if (self.index >= self.content.len) try self.eof();
            return self.last;
        } else {
            return null;
        }
    }
};

M zckt.zig => zckt.zig +10 -543
@@ 1,558 1,25 @@
// small, bodged together ckt parser
const std = @import("std");
const debug = std.debug;
const fmt = std.fmt;
const mem = std.mem;

pub const Key = []const u8;

pub const Value = union(enum) {
    string: []const u8,
    table: Table,

    pub fn deinit(self: *Value, a: *mem.Allocator) void {
        switch (self.*) {
            .string => |str| a.free(str),
            .table => |*table| table.deinit(),
        }
    }

    pub fn asString(self: Value) error{NotAString}![]const u8 {
        switch (self) {
            .string => |str| return str,
            .table => return error.NotAString,
        }
    }

    pub fn asTable(self: Value) error{NotATable}!Table {
        switch (self) {
            .string => return error.NotATable,
            .table => |table| return table,
        }
    }

    pub fn format(self: Value, comptime _: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
        switch (self) {
            .string => |str| try writer.print("{s}", .{str}),
            .table => |table| try writer.print("[ ... ]", .{}), //try writer.print("{s}", .{table}),
        }
    }
};

pub const Table = struct {
    pub const Map = std.StringArrayHashMap(Value);

    map: Map,
    a: *mem.Allocator,

    pub fn init(a: *mem.Allocator) Table {
        return .{
            .map = Map.init(a),
            .a = a,
        };
    }

    pub fn format(self: Table, comptime _: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
        try writer.print("[ ", .{});
        var iter = self.map.iterator();
        while (iter.next()) |entry| try writer.print("{s}={s};", .{ entry.key, entry.value });
        try writer.print(" ]", .{});
    }

    pub fn get(self: *Table, key: []const u8) ?Value {
        return self.map.get(key);
    }

    pub fn getString(self: *Table, key: []const u8) ?[]const u8 {
        const value = self.map.get(key) orelse return null;
        return value.asString() catch return null;
    }

    pub fn getTable(self: *Table, key: []const u8) ?Table {
        const value = self.map.get(key) orelse return null;
        return value.asTable() catch return null;
    }

    pub fn put(self: *Table, key: []const u8, value: Value) !void {
        var result = try self.map.fetchPut(key, value);

        // free memory if needed
        if (result) |*entry| {
            self.a.free(key);
            entry.value.deinit(self.a);
        }
    }

    pub fn deinit(self: *Table) void {
        var iter = self.map.iterator();

        while (iter.next()) |entry| {
            self.a.free(entry.key);
            entry.value.deinit(self.a);
        }

        self.map.deinit();
    }
};

pub const Token = union(enum) {
    pub const String = union(enum) {
        unquoted: []const u8,
        quoted: []const u8,
        multiline: []const u8,

        pub fn format(self: String, comptime _: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
            switch (self) {
                .unquoted => |str| try writer.print("unquoted: {s}", .{str}),
                .quoted => |str| try writer.print("quoted: {s}", .{str}),
                .multiline => |str| try writer.print("multiline: {s}", .{str}),
            }
        }
    };

    keyeq: String, // key =
    value: String, // value
    table_start, // table start
    table_end,

    // just for pretty printing when debugging lol
    pub fn format(self: Token, comptime _: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
        switch (self) {
            .keyeq => |str| try writer.print("key {{{s}}} =", .{str}),
            .value => |str| try writer.print("value {{{s}}};", .{str}),
            .table_start => |str| try writer.print("[", .{}),
            .table_end => |str| try writer.print("]", .{}),
        }
    }
};

pub const Tokenizer = struct {
    const Error = error{
        EmptyKey,
        InvalidChar,
        UnexpectedNewline,
        UnexpectedEof,
    };

    content: []const u8,

    index: usize,
    last: Token,
    finished: bool,

    pub fn init(data: []const u8) Tokenizer {
        return Tokenizer{
            .content = data,

            .index = 0,
            .last = undefined,
            .finished = false,
        };
    }

    fn eof(self: *Tokenizer) Error!void {
        switch (self.last) {
            .table_start => return Error.UnexpectedEof,
            else => self.finished = true,
        }
    }

    fn skipToNewline(self: *Tokenizer) Error!void {
        while (self.content[self.index] != '\n') {
            self.index += 1;
            if (self.index >= self.content.len) {
                return self.eof();
            }
        }
    }

    fn skipPastNewline(self: *Tokenizer) Error!void {
        try self.skipToNewline();
        self.index += 1;
    }

    fn skipWhitespace(self: *Tokenizer) Error!void {
        while (self.index < self.content.len) : (self.index += 1) {
            switch (self.content[self.index]) {
                // actual whitespace
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                '#' => return try self.skipToNewline(),
                else => return,
            }
        }
    }

    fn skipWhitespaceAndNewlines(self: *Tokenizer) Error!void {
        while (self.index < self.content.len) : (self.index += 1) {
            switch (self.content[self.index]) {
                // actual whitespace
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                '#' => try self.skipToNewline(),
                '\n' => continue,
                else => return,
            }
        }
        try self.eof();
    }

    fn skipWhitespaceAndNewlinesAndBreaks(self: *Tokenizer) Error!void {
        while (self.index < self.content.len) : (self.index += 1) {
            switch (self.content[self.index]) {
                // actual whitespace
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                ';', ',' => continue,
                '#' => try self.skipToNewline(),
                '\n' => continue,
                else => return,
            }
        }
        try self.eof();
    }

    fn readString(self: *Tokenizer) ![]const u8 {
        const start_index = self.index;
        while (self.index < self.content.len) : (self.index += 1) {
            const char = self.content[self.index];
            switch (char) {
                '\n', '=', ';', ',', ']' => {
                    // remove trailing whitespace
                    // (eg key = value)
                    var end_index = self.index - 1;
                    while (end_index > start_index) {
                        switch (self.content[end_index]) {
                            ' ', '\t', 0x0B, 0x0C, '\r' => end_index -= 1,
                            else => return self.content[start_index .. end_index + 1],
                        }
                    }
                    return error.EmptyKey;
                },
                else => continue,
            }
        }

        // EOF
        try self.eof();
        return self.content[start_index..self.index];
    }

    fn readQuotedString(self: *Tokenizer, comptime quote: u8) Error![]const u8 {
        const start_index = self.index;

        self.index += 1;

        while (self.index < self.content.len) : (self.index += 1) {
            const char = self.content[self.index];
            switch (char) {
                quote => {
                    self.index += 1;
                    return self.content[start_index..self.index];
                },
                '\n' => return error.UnexpectedNewline,
                '\\' => self.index += 1,
                else => continue,
            }
        }

        // EOF before closing bracket
        return error.UnexpectedEof;
    }

    fn readMultilineString(self: *Tokenizer) Error![]const u8 {
        const start_index = self.index;

        const State = enum { root, line };
        var state = State.root;

        while (self.index < self.content.len) : (self.index += 1) {
            switch (state) {
                .root => {
                    const end_index = self.index;
                    try self.skipWhitespace();
                    switch (self.content[self.index]) {
                        '|' => state = State.line,
                        else => return self.content[start_index..end_index],
                    }
                },
                .line => {
                    switch (self.content[self.index]) {
                        '\n' => state = State.root,
                        else => continue,
                    }
                },
            }
        }

        // empty line or EOF
        return self.content[start_index..self.index];
    }

    pub fn next(self: *Tokenizer) Error!?Token {
        if (!self.finished) {
            self.last = ret: {
                // treat ; and , as whitespace
                try self.skipWhitespaceAndNewlinesAndBreaks();
                if (self.finished) return null;
                const char = self.content[self.index];
                switch (char) {
                    '[' => break :ret Token.table_start,
                    ']' => break :ret Token.table_end,
                    '=' => return error.InvalidChar,
                    else => {
                        var value: Token.String = switch (char) {
                            '"' => Token.String{ .quoted = try self.readQuotedString('"') },
                            '\'' => Token.String{ .quoted = try self.readQuotedString('\'') },
                            '|' => Token.String{ .multiline = try self.readMultilineString() },
                            else => Token.String{ .unquoted = try self.readString() },
                        };
                        try self.skipWhitespaceAndNewlines();
                        if (!self.finished) {
                            switch (self.content[self.index]) {
                                ';', ',' => break :ret Token{ .value = value },
                                '=' => break :ret Token{ .keyeq = value },
                                ']' => {
                                    self.index -= 1;
                                    break :ret Token{ .value = value };
                                },
                                else => {
                                    self.index -= 1;
                                    break :ret Token{ .value = value };
                                },
                            }
                        } else break :ret Token{ .value = value };
                    },
                }
            };
            self.index += 1;
            if (self.index >= self.content.len) try self.eof();
            return self.last;
        } else {
            return null;
        }
    }
};

// just a namespace to put parsing-related functions in,
// rather than having them scattered about in the top-level.
pub const Parser = struct {
    const MemError = error{
        OutOfMemory,
    };

    const StringError = error{
        UnexpectedNewline,
        InvalidEscapeChar,
        InvalidHexEscape,
        InvalidUnicodeEscape,
    };

    fn skipWhitespace(str: []const u8, i: *usize) void {
        while (i.* < str.len) : (i.* += 1) {
            switch (str[i.*]) {
                ' ', '\t', 0x0B, 0x0C, '\r' => continue,
                else => return,
            }
        }
    }

    fn parseMultilineString(str: []const u8, a: *mem.Allocator) MemError![]const u8 {
        const State = enum { root, line };

        var state = State.line;
        var buf = std.ArrayList(u8).init(a);
        defer buf.deinit();

        var i: usize = 0;
        debug.assert(str[i] == '|');
        i += 1; // skip initial pipe

        while (i < str.len) : (i += 1) {
            switch (state) {
                .root => {
                    skipWhitespace(str, &i);
                    switch (str[i]) {
                        '|' => state = State.line,
                        else => return buf.toOwnedSlice(),
                    }
                },
                .line => {
                    const char = str[i];
                    switch (char) {
                        '\\' => {
                            const next_is_newline_or_eof = if (i + 1 < str.len) str[i + 1] == '\n' else true;
                            if (next_is_newline_or_eof) {
                                if (str[i - 1] == '\\') continue;
                                i += 1; // skip over newline
                                state = State.root;
                            } else {
                                try buf.append('\\');
                            }
                        },
                        '\n' => {
                            try buf.append('\n');
                            state = State.root;
                        },
                        else => try buf.append(char),
                    }
                },
            }
        }

        // this shouldn't happen; the tokenizer should've caught and errored on this
        return buf.toOwnedSlice();
    }

    // ~~stolen~~ inspired by std/zig/string_literal.zig
    fn parseQuotedString(str: []const u8, a: *mem.Allocator, comptime quote: u8) (MemError || StringError)![]const u8 {
        const State = enum { root, backslash };

        var state = State.root;
        var buf = std.ArrayList(u8).init(a);
        defer buf.deinit();

        var i: usize = 0;
        i += 1;

        while (i < str.len) : (i += 1) {
            const char = str[i];
            switch (state) {
                .root => switch (char) {
                    '\n' => return error.UnexpectedNewline,
                    '\\' => state = State.backslash,
                    quote => {
                        i += 1;
                        return buf.toOwnedSlice();
                    },
                    else => {
                        try buf.append(char);
                    },
                },
                .backslash => switch (char) {
                    'f' => {
                        try buf.append(0x0C);
                        state = State.root;
                    },
                    'n' => {
                        try buf.append('\n');
                        state = State.root;
                    },
                    'r' => {
                        try buf.append('\r');
                        state = State.root;
                    },
                    't' => {
                        try buf.append('\t');
                        state = State.root;
                    },
                    'v' => {
                        try buf.append(0x0B);
                        state = State.root;
                    },
                    '\\', '"', '\'' => {
                        try buf.append(char);
                        state = State.root;
                    },
                    'x' => {
                        if (str.len < i + 3) return error.InvalidHexEscape;
                        if (fmt.parseUnsigned(u8, str[i + 1 .. i + 3], 16)) |byte| {
                            try buf.append(byte);
                            state = State.root;
                            i += 2;
                        } else |err| {
                            return error.InvalidHexEscape;
                        }
                    },
                    'u' => {
                        if (str.len < i + 5) return error.InvalidUnicodeEscape;
                        if (fmt.parseUnsigned(u16, str[i + 1 .. i + 5], 16)) |unic| {
                            try buf.appendSlice(mem.toBytes(unic)[0..]);
                            state = State.root;
                            i += 4;
                        } else |err| {
                            return error.InvalidUnicodeEscape;
                        }
                    },
                    'U' => {
                        if (str.len < i + 9) return error.InvalidUnicodeEscape;
                        if (fmt.parseUnsigned(u32, str[i + 1 .. i + 9], 16)) |unic| {
                            try buf.appendSlice(mem.toBytes(unic)[0..]);
                            state = State.root;
                            i += 8;
                        } else |err| {
                            return error.InvalidUnicodeEscape;
                        }
                    },
                    else => return error.InvalidEscapeChar,
                },
            }
        }

        // unclosed string
        // the tokenizer should've caught this
        return buf.toOwnedSlice();
    }

    fn parseString(tok: Token.String, a: *mem.Allocator) (MemError || StringError)![]const u8 {
        switch (tok) {
            .unquoted => |str| return a.dupe(u8, str),
            .quoted => |str| return switch (str[0]) {
                '"' => parseQuotedString(str, a, '"'),
                '\'' => parseQuotedString(str, a, '\''),
                else => @panic("unknown quote?"),
            },
            .multiline => |str| return parseMultilineString(str, a),
        }
    }

    pub fn parseTable(tokens: *Tokenizer, a: *mem.Allocator) (MemError || StringError || Tokenizer.Error)!Table {
        var table = Table.init(a);
        var key: ?Key = null;
        var value: ?Value = null;
        var i: usize = 0;

        while (try tokens.next()) |token| {
            switch (token) {
                .table_end => return table,
                .keyeq => |str| key = try parseString(str, a),
                .table_start => value = .{ .table = try parseTable(tokens, a) },
                .value => |str| value = .{ .string = try parseString(str, a) },
            }

            if (value) |val| {
                const keystr = key orelse key: {
                    defer i += 1;
                    break :key try fmt.allocPrint(a, "{d}", .{i});
                };
                try table.put(keystr, val);
                value = null;
                key = null;
            }
        }

        // if no table end, return the table anyway;
        // any unclosed tables should've been caught by the tokenizer
        return table;
    }

    // aka 'put that shit in a hashmap please'
    pub fn parse(data: []const u8, a: *mem.Allocator) !Table {
        var tokenizer = Tokenizer.init(data);
        return parseTable(&tokenizer, a);
    }
};
pub const tokenizer = @import("tokenizer.zig");
pub const parser = @import("parser.zig");

const testing = std.testing;
const alloc = testing.allocator;
const table_parser = parser.table_parser;

// TODO: more tests
test "key = [ table ]" {
    const zckt =
        \\table = [ this is a table :), this is the second value ]
    ;

    var table = try Parser.parse(zckt, alloc);
    var table = try table_parser.parse(zckt, alloc);
    defer table.deinit();

    testing.expectEqualStrings(table.getTable("table").?.getString("1").?, "this is the second value");
    testing.expectEqualStrings(
        "this is the second value",
        table.getTable("table").?.getString("1").?,
    );
}

test "duplicate keys overwrite" {


@@ 563,7 30,7 @@ test "duplicate keys overwrite" {
        \\key = haha! overwritten
    ;

    var table = try Parser.parse(zckt, alloc);
    var table = try table_parser.parse(zckt, alloc);
    defer table.deinit();

    testing.expectEqualStrings("haha! overwritten", table.getString("key").?);


@@ 584,7 51,7 @@ test "multiline string" {
        \\however, i can also make part of it span one line!
    ;

    var table = try Parser.parse(zckt, alloc);
    var table = try table_parser.parse(zckt, alloc);
    defer table.deinit();

    testing.expectEqualStrings(table.getString("multiline string").?, multiline_string);