~andreafeletto/zig-scfg

239dc6f90cea2d6453e8e472ed29c8864f53cf6f — Andrea Feletto 2 years ago 0bb9e3f
major rewrite
10 files changed, 456 insertions(+), 425 deletions(-)

A .build.yml
M .gitignore
M README.md
D ast.zig
A example.scfg
A example.zig
D parser.zig
M scfg.zig
A src/Parser.zig
R tokenizer.zig => src/Tokenizer.zig
A .build.yml => .build.yml +12 -0
@@ 0,0 1,12 @@
image: archlinux
packages:
  - zig
sources:
  - https://git.sr.ht/~andreafeletto/zig-scfg
tasks:
  - fmt: |
      cd zig-scfg
      zig fmt --check .
  - test: |
      cd zig-scfg
      zig test scfg.zig

M .gitignore => .gitignore +1 -2
@@ 1,2 1,1 @@
zig-cache/
zig-out/
zig-cache

M README.md => README.md +39 -59
@@ 1,17 1,19 @@

# [zig-scfg]

A [zig] library for [scfg].
[![builds.sr.ht status](https://builds.sr.ht/~andreafeletto/zig-scfg/commits/main.svg)](https://builds.sr.ht/~andreafeletto/zig-scfg/commits/main)

## Usage
A [zig] library for parsing [scfg] configuration files.

## Setup

First clone this repository as a submodule.
Clone this repository as a submodule.

```sh
git submodule add https://git.sr.ht/~andreafeletto/zig-scfg deps/zig-scfg
```

Than add the following to `build.zig`.
Than add the following to your `build.zig`.

```zig
pub fn build(b: *std.build.Builder) void {


@@ 31,77 33,55 @@ The library can now be imported into your zig project.
const scfg = @import("scfg");
```

## Documentation
## Usage

The function `parse` takes an allocator and a null-terminated string and
generates a tree.
The tree is owned by the caller, who is responsible for calling `deinit`.
The tree contains references to the source string, so the latter should not be
deallocated before the tree.
I suggested to use an arena allocator. The resulting tree structure can be quite
complex, so manual deallocation could be tricky.

```zig
const source =
    \\model A2 {
    \\  speed 250
    \\  shape {
    \\    length 50
    \\    width 100
    \\  }
    \\}
    \\model C5 {
    \\  speed 350
    \\  shape {
    \\    length 10
    \\    width 260
    \\  }
    \\}
;

var tree = try scfg.parse(allocator, source);
defer tree.deinit(allocator);
```

The function `getAll` returns a slice (owned by the caller) of the top-level
directives filtered by their name.
const std = @import("std");
const scfg = @import("scfg");

```zig
const models = try ast.getAll(allocator, "model");
defer allocator.free(models);
```
pub fn main() !void {
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();

The function `find` returns the first top-level directive with matching name
and parameters.
The parameters are compared in order and the lengths must be equal.
If no match is found, `null` is returned.
    const file = try std.fs.cwd().openFile("example.scfg", .{});
    // `source` must be a null-terminated string
    const source = try file.readToEndAllocOptions(
        allocator, 1_000_000, null, @alignOf(u8), 0
    );

```zig
const model_c5 = ast.find("model", &.{"C5"}) orelse {
    std.log.err("not found", .{});
    return;
};
    const root = try scfg.parse(allocator, source);
    std.log.info("identifier of the first directive: {s}", .{root[0].name});
}
```

Every function can also be called on a directive.
The function `get` returns the first top-level directive with matching name.
If no match is found, `null` is returned.
The result of the `parse` function is a slice of pointers to directives.
Each directive has the following recursive structure:

```zig
const model_c5_speed = model_c5.get("speed") orelse {
    std.log.err("not found", .{});
    return;
const Directive = struct {
    name: []const u8,
    params: [][]const u8,
    blocks: [][]*Directive,
};
```

The parameters of a directive can be accessed through the `params` field.

```zig
const speed_value = model_c5_speed.params[0];
```

## License
## Contributing

The code in this repository is released under the MIT license.
You are welcome to send patches to the [mailing list] or report bugs on the
[issue tracker].

If you aren't familiar with `git send-email`, you can use the [web interface]
or learn about it following this excellent [tutorial].

[zig-scfg]: https://sr.ht/~andreafeletto/zig-scfg/
[zig]: https://ziglang.org/
[scfg]: https://git.sr.ht/~emersion/scfg/
[mailing list]: https://lists.sr.ht/~andreafeletto/public-inbox
[issue tracker]: https://todo.sr.ht/~andreafeletto/zig-scfg
[web interface]: https://git.sr.ht/~andreafeletto/zig-scfg/send-email
[tutorial]: https://git-send-email.io

D ast.zig => ast.zig +0 -89
@@ 1,89 0,0 @@
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;

pub const Ast = struct {
    source: [:0]const u8,
    nodes: []const Node,
    root: Node,

    pub const Node = struct {
        name: []const u8,
        params: []const []const u8,
        children: []const *Node,

        pub fn get(self: *const Node, name: []const u8) ?*const Node {
            for (self.children) |child| {
                if (mem.eql(u8, name, child.name)) {
                    return child;
                }
            }
            return null;
        }

        pub fn getAll(
            self: *const Node,
            allocator: Allocator,
            name: []const u8,
        ) ![]*const Node {
            var nodes = std.ArrayList(*const Node).init(allocator);
            for (self.children) |child| {
                if (mem.eql(u8, name, child.name)) {
                    try nodes.append(child);
                }
            }
            return nodes.toOwnedSlice();
        }

        pub fn find(
            self: *const Node,
            name: []const u8,
            params: []const []const u8,
        ) ?*const Node {
            outer: for (self.children) |child| {
                if (!mem.eql(u8, name, child.name)) {
                    continue;
                }
                if (child.params.len != params.len) {
                    continue;
                }
                for (child.params) |param, i| {
                    if (!mem.eql(u8, params[i], param)) {
                        continue :outer;
                    }
                }
                return child;
            }
            return null;
        }
    };

    pub fn deinit(self: *Ast, allocator: Allocator) void {
        for (self.nodes) |node| {
            allocator.free(node.params);
            allocator.free(node.children);
        }
        allocator.free(self.root.children);
        allocator.free(self.nodes);
    }

    pub fn get(self: *const Ast, name: []const u8) ?*const Node {
        return self.root.get(name);
    }

    pub fn getAll(
        self: *const Ast,
        allocator: Allocator,
        name: []const u8,
    ) ![]*const Node {
        return self.root.getAll(allocator, name);
    }

    pub fn find(
        self: *const Ast,
        name: []const u8,
        params: []const []const u8,
    ) ?*const Node {
        return self.root.find(name, params);
    }
};

A example.scfg => example.scfg +15 -0
@@ 0,0 1,15 @@
train "Shinkansen" {
    model "E5" {
        max-speed 320km/h
        weight 453.5t

        lines-served "Tōhoku" "Hokkaido"
    }

    model "E7" {
        max-speed 275km/h
        weight 540t

        lines-served "Hokuriku" "Jōetsu"
    }
}

A example.zig => example.zig +15 -0
@@ 0,0 1,15 @@
const std = @import("std");
const scfg = @import("scfg.zig");

pub fn main() !void {
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();

    const file = try std.fs.cwd().openFile("example.scfg", .{});
    // `source` must be a null-terminated string
    const source = try file.readToEndAllocOptions(allocator, 1_000_000, null, @alignOf(u8), 0);

    const root = try scfg.parse(allocator, source);
    std.log.info("identifier of the first directive: {s}", .{root[0].name});
}

D parser.zig => parser.zig +0 -105
@@ 1,105 0,0 @@
const std = @import("std");
const testing = std.testing;
const Allocator = std.mem.Allocator;

const Token = @import("tokenizer.zig").Token;

pub const Parser = struct {
    allocator: Allocator,
    source: [:0]const u8,

    state: enum { new, update },
    index: usize,

    nodes: std.ArrayListUnmanaged(struct {
        name: []const u8,
        params: std.ArrayListUnmanaged([]const u8),
        children: std.ArrayListUnmanaged(usize),
    }),
    roots: std.ArrayListUnmanaged(usize),
    path: std.ArrayListUnmanaged(usize),

    pub fn init(allocator: Allocator, source: [:0]const u8) Parser {
        return Parser{
            .allocator = allocator,
            .source = source,
            .state = .new,
            .index = 0,
            .nodes = .{},
            .roots = .{},
            .path = .{},
        };
    }

    pub fn deinit(self: *Parser) void {
        for (self.nodes.items) |*node| {
            node.params.deinit(self.allocator);
            node.children.deinit(self.allocator);
        }
        self.nodes.deinit(self.allocator);
        self.roots.deinit(self.allocator);
        self.path.deinit(self.allocator);
    }

    pub fn feed(self: *Parser, token: *const Token) !void {
        switch (self.state) {
            .new => switch (token.tag) {
                .newline => {},
                .bare_string => {
                    try self.nodes.append(self.allocator, .{
                        .name = self.source[token.loc.start..token.loc.end],
                        .params = .{},
                        .children = .{},
                    });
                    if (self.path.items.len != 0) {
                        const parent = self.path.items[self.path.items.len - 1];
                        const siblings = &self.nodes.items[parent].children;
                        try siblings.append(self.allocator, self.index);
                    } else {
                        try self.roots.append(self.allocator, self.index);
                    }
                    self.state = .update;
                    self.index += 1;
                },
                .r_brace => {
                    _ = self.path.pop();
                },
                .eof => {
                    if (self.path.items.len != 0) {
                        return error.InvalidToken;
                    }
                },
                else => {
                    return error.InvalidToken;
                },
            },
            .update => switch (token.tag) {
                .bare_string => {
                    const param = self.source[token.loc.start..token.loc.end];

                    const node = &self.nodes.items[self.index - 1];
                    try node.params.append(self.allocator, param);
                },
                // TODO: handle escape characters for dquote strigs
                .squote_string, .dquote_string => {
                    const start = token.loc.start + 1;
                    const end = token.loc.end - 1;
                    const param = self.source[start..end];

                    const node = &self.nodes.items[self.index - 1];
                    try node.params.append(self.allocator, param);
                },
                .l_brace => {
                    self.state = .new;
                    try self.path.append(self.allocator, self.index - 1);
                },
                .newline, .eof => {
                    self.state = .new;
                },
                else => {
                    return error.InvalidToken;
                },
            },
        }
    }
};

M scfg.zig => scfg.zig +63 -51
@@ 2,14 2,20 @@ const std = @import("std");
const testing = std.testing;
const Allocator = std.mem.Allocator;

const Ast = @import("ast.zig").Ast;
const Parser = @import("parser.zig").Parser;
const Tokenizer = @import("tokenizer.zig").Tokenizer;
const Parser = @import("src/Parser.zig");
const Tokenizer = @import("src/Tokenizer.zig");

pub fn parse(allocator: Allocator, source: [:0]const u8) !Ast {
const Block = []*Directive;

const Directive = struct {
    name: []const u8,
    params: [][]const u8,
    blocks: []Block,
};

pub fn parse(allocator: Allocator, source: [:0]const u8) !Block {
    var tokenizer = Tokenizer.init(source);
    var parser = Parser.init(allocator, source);
    defer parser.deinit();
    var parser: Parser = .{ .allocator = allocator, .source = source };

    while (true) {
        const token = tokenizer.next();


@@ 19,66 25,72 @@ pub fn parse(allocator: Allocator, source: [:0]const u8) !Ast {
        }
    }

    const ast_nodes = try allocator.alloc(Ast.Node, parser.nodes.items.len);
    for (parser.nodes.items) |*node, i| {
        const children = try allocator.alloc(
            *Ast.Node,
            node.children.items.len,
        );
        for (node.children.items) |child, j| {
            children[j] = &ast_nodes[child];
    const directives = try allocator.alloc(Directive, parser.directives.items.len);
    const blocks = try allocator.alloc(Block, parser.blocks.items.len);

    // convert blocks from arrays of indeces to arrays of pointers
    for (parser.blocks.items) |*block, i| {
        blocks[i] = try allocator.alloc(*Directive, block.items.len);
        for (block.items) |directive_idx, j| {
            blocks[i][j] = &directives[directive_idx];
        }
        ast_nodes[i] = .{
            .name = node.name,
            .params = node.params.toOwnedSlice(allocator),
            .children = children,
        };
    }

    const roots = try allocator.alloc(*Ast.Node, parser.roots.items.len);
    for (parser.roots.items) |root, i| {
        roots[i] = &ast_nodes[root];
    // copy directives and replace parser blocks with pointer-based blocks
    for (parser.directives.items) |*directive, i| {
        directives[i] = .{
            .name = directive.name,
            .params = directive.params.toOwnedSlice(allocator),
            .blocks = try allocator.alloc(Block, directive.blocks.items.len),
        };
        for (directive.blocks.items) |block_idx, j| {
            directives[i].blocks[j] = blocks[block_idx];
        }
    }

    return Ast{
        .source = source,
        .nodes = ast_nodes,
        .root = .{
            .name = "root",
            .params = &.{},
            .children = roots,
        },
    };
    _ = allocator.resize(blocks, 1);
    return blocks[0];
}

test "parse: minimal" {
    const source = "model A2 thin";

    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();

    const root = try parse(arena.allocator(), source);
    try testing.expectEqual(@as(usize, 1), root.len);
    try testing.expectEqualStrings("model", root[0].name);
    try testing.expectEqual(@as(usize, 2), root[0].params.len);
    try testing.expectEqualStrings("A2", root[0].params[0]);
    try testing.expectEqualStrings("thin", root[0].params[1]);
}

test {
test "parse: directives with a block" {
    const source =
        \\model A2 {
        \\  speed 250
        \\  shape {
        \\    length 50
        \\    width 100
        \\  }
        \\}
        \\model C5 {
        \\  speed 350
        \\  shape {
        \\    length 10
        \\    width 260
        \\  }
        \\model A3 {
        \\  speed 270
        \\}
    ;

    var ast = try parse(testing.allocator, source);
    defer ast.deinit(testing.allocator);
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();

    const models = try ast.getAll(testing.allocator, "model");
    defer testing.allocator.free(models);
    try testing.expectEqual(@as(usize, 2), models.len);
    const root = try parse(arena.allocator(), source);
    try testing.expectEqual(@as(usize, 2), root.len);

    const model_c5 = ast.find("model", &.{"C5"}).?;
    try testing.expectEqual(&ast.nodes[5], model_c5);
    try testing.expectEqualStrings("model", root[0].name);
    try testing.expectEqual(@as(usize, 1), root[0].params.len);
    try testing.expectEqualStrings("A2", root[0].params[0]);
    try testing.expectEqualStrings("speed", root[0].blocks[0][0].name);
    try testing.expectEqualStrings("250", root[0].blocks[0][0].params[0]);

    const model_c5_speed = model_c5.get("speed").?.params[0];
    try testing.expectEqualStrings("350", model_c5_speed);
    try testing.expectEqualStrings("model", root[1].name);
    try testing.expectEqual(@as(usize, 1), root[1].params.len);
    try testing.expectEqualStrings("A3", root[1].params[0]);
    try testing.expectEqualStrings("speed", root[1].blocks[0][0].name);
    try testing.expectEqualStrings("270", root[1].blocks[0][0].params[0]);
}

A src/Parser.zig => src/Parser.zig +166 -0
@@ 0,0 1,166 @@
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
const ArrayListUnmanaged = std.ArrayListUnmanaged;

const Tokenizer = @import("Tokenizer.zig");
const Token = Tokenizer.Token;
const Parser = @This();

const Directive = struct {
    name: []const u8,
    params: ArrayListUnmanaged([]const u8),
    // indeces on the Parser.blocks list
    blocks: ArrayListUnmanaged(usize),
};

// indeces on the Parser.directives list
const Block = ArrayListUnmanaged(usize);

allocator: mem.Allocator,
source: [:0]const u8,

state: enum { new, update } = .new,
directive_idx: usize = 0,

directives: ArrayListUnmanaged(Directive) = .{},
blocks: ArrayListUnmanaged(Block) = .{},
path: ArrayListUnmanaged(usize) = .{},

pub fn feed(self: *Parser, token: *const Token) !void {
    switch (self.state) {
        .new => switch (token.tag) {
            .newline => {},
            .bare_string => {
                try self.directives.append(self.allocator, .{
                    .name = self.source[token.loc.start..token.loc.end],
                    .params = .{},
                    .blocks = .{},
                });
                self.directive_idx = self.directives.items.len - 1;

                // create top-level block on first directive
                if (self.blocks.items.len == 0) {
                    try self.blocks.append(self.allocator, .{});
                    try self.path.append(self.allocator, 0);
                }

                // append newly created directive to current block
                const block_idx = self.path.items[self.path.items.len - 1];
                const block = &self.blocks.items[block_idx];
                try block.append(self.allocator, self.directive_idx);

                self.state = .update;
            },
            .r_brace => {
                if (self.path.items.len == 1) {
                    return error.InvalidToken;
                }
                _ = self.path.pop();
                self.state = .update;
            },
            .eof => {
                if (self.path.items.len > 1) {
                    return error.InvalidToken;
                }
            },
            else => {
                return error.InvalidToken;
            },
        },
        .update => switch (token.tag) {
            .bare_string => {
                const directive = &self.directives.items[self.directive_idx];
                const param = self.source[token.loc.start..token.loc.end];
                try directive.params.append(self.allocator, param);
            },
            // TODO: handle escape characters for dquote strigs
            .squote_string, .dquote_string => {
                const directive = &self.directives.items[self.directive_idx];
                const start = token.loc.start + 1;
                const end = token.loc.end - 1;
                const param = self.source[start..end];
                try directive.params.append(self.allocator, param);
            },
            .l_brace => {
                try self.blocks.append(self.allocator, .{});
                const block_idx = self.blocks.items.len - 1;

                const directive = &self.directives.items[self.directive_idx];
                try directive.blocks.append(self.allocator, block_idx);
                try self.path.append(self.allocator, block_idx);
                self.state = .new;
            },
            .newline, .eof => {
                if (token.tag == .eof and self.path.items.len > 1) {
                    return error.InvalidToken;
                }
                self.state = .new;
            },
            else => {
                return error.InvalidToken;
            },
        },
    }
}

test "parser: minimal" {
    const source = "model A2 'A3'";

    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();

    var tokenizer = Tokenizer.init(source);
    var parser: Parser = .{ .allocator = arena.allocator(), .source = source };

    while (true) {
        const token = tokenizer.next();
        try parser.feed(&token);
        if (token.tag == .eof) break;
    }

    try testing.expectEqual(@as(usize, 1), parser.directives.items.len);
    try testing.expectEqualStrings("model", parser.directives.items[0].name);

    try testing.expectEqual(@as(usize, 2), parser.directives.items[0].params.items.len);
    try testing.expectEqualStrings("A2", parser.directives.items[0].params.items[0]);
    try testing.expectEqualStrings("A3", parser.directives.items[0].params.items[1]);

    try testing.expectEqual(@as(usize, 1), parser.blocks.items.len);
    try testing.expectEqualSlices(usize, &.{0}, parser.blocks.items[0].items);
}

test "parser: directive with a block" {
    const source =
        \\model A2 {
        \\  speed 250 kmph
        \\}
    ;

    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();

    var tokenizer = Tokenizer.init(source);
    var parser: Parser = .{ .allocator = arena.allocator(), .source = source };

    while (true) {
        const token = tokenizer.next();
        try parser.feed(&token);
        if (token.tag == .eof) break;
    }

    try testing.expectEqual(@as(usize, 2), parser.blocks.items.len);
    try testing.expectEqual(@as(usize, 2), parser.directives.items.len);

    try testing.expectEqualStrings("model", parser.directives.items[0].name);
    try testing.expectEqual(@as(usize, 1), parser.directives.items[0].params.items.len);
    try testing.expectEqualStrings("A2", parser.directives.items[0].params.items[0]);

    try testing.expectEqual(@as(usize, 1), parser.directives.items[0].blocks.items.len);
    try testing.expectEqual(@as(usize, 1), parser.directives.items[0].blocks.items[0]);

    try testing.expectEqualStrings("speed", parser.directives.items[1].name);
    try testing.expectEqual(@as(usize, 2), parser.directives.items[1].params.items.len);
    try testing.expectEqualStrings("250", parser.directives.items[1].params.items[0]);
    try testing.expectEqualStrings("kmph", parser.directives.items[1].params.items[1]);
}

R tokenizer.zig => src/Tokenizer.zig +145 -119
@@ 1,6 1,8 @@
const std = @import("std");
const testing = std.testing;

const Tokenizer = @This();

pub const Token = struct {
    tag: Tag,
    loc: Loc,


@@ 22,129 24,148 @@ pub const Token = struct {
    };
};

pub const Tokenizer = struct {
    source: [:0]const u8,
    index: usize,
const State = enum {
    start,
    bare_string,
    squote_string,
    dquote_string,
    newline,
};

    pub fn init(source: [:0]const u8) Tokenizer {
        return Tokenizer{ .source = source, .index = 0 };
    }
source: [:0]const u8,
index: usize,

    const State = enum {
        start,
        bare_string,
        squote_string,
        dquote_string,
        newline,
pub fn init(source: [:0]const u8) Tokenizer {
    return Tokenizer{ .source = source, .index = 0 };
}

pub fn next(self: *Tokenizer) Token {
    var state: State = .start;
    var token: Token = .{
        .tag = .eof,
        .loc = .{ .start = self.index, .end = undefined },
    };

    pub fn next(self: *Tokenizer) Token {
        var state: State = .start;
        var token: Token = .{
            .tag = .eof,
            .loc = .{ .start = self.index, .end = undefined },
        };

        while (true) : (self.index += 1) {
            const char = self.source[self.index];

            switch (state) {
                .start => switch (char) {
                    0 => {
                        break;
                    },
                    ' ', '\t', '\r' => {
                        token.loc.start = self.index + 1;
                    },
                    '\n' => {
                        state = .newline;
                        token.tag = .newline;
                    },
                    '\'' => {
                        state = .squote_string;
                        token.tag = .squote_string;
                    },
                    '"' => {
                        state = .dquote_string;
                        token.tag = .dquote_string;
                    },
                    'a'...'z', 'A'...'Z', '0'...'9', '_' => {
                        state = .bare_string;
                        token.tag = .bare_string;
                    },
                    '{' => {
                        self.index += 1;
                        token.tag = .l_brace;
                        token.loc.end = self.index;
                        return token;
                    },
                    '}' => {
                        self.index += 1;
                        token.tag = .r_brace;
                        token.loc.end = self.index;
                        return token;
                    },
                    else => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        self.index += 1;
                        return token;
                    },
                },
                .bare_string => switch (char) {
                    0, ' ', '\t', '\r', '\n', '{', '}' => {
                        break;
                    },
                    '"', '\'' => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        self.index += 1;
                        return token;
                    },
                    else => {},
                },
                .squote_string => switch (char) {
                    '\'' => {
                        self.index += 1;
                        break;
                    },
                    0, '\n' => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        return token;
                    },
                    else => {},
                },
                .dquote_string => switch (char) {
                    '"' => {
                        self.index += 1;
                        break;
                    },
                    0, '\n' => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        return token;
                    },
                    '\\' => {
                        self.index += 1;
                    },
                    else => {},
                },
                .newline => switch (char) {
                    '\n' => {},
                    else => {
                        break;
                    },
                },
            }
    while (true) : (self.index += 1) {
        const char = self.source[self.index];

        switch (state) {
            .start => switch (char) {
                0 => {
                    break;
                },
                ' ', '\t', '\r' => {
                    token.loc.start = self.index + 1;
                },
                '\n' => {
                    state = .newline;
                    token.tag = .newline;
                },
                '\'' => {
                    state = .squote_string;
                    token.tag = .squote_string;
                },
                '"' => {
                    state = .dquote_string;
                    token.tag = .dquote_string;
                },
                'a'...'z', 'A'...'Z', '0'...'9', '_' => {
                    state = .bare_string;
                    token.tag = .bare_string;
                },
                '{' => {
                    self.index += 1;
                    token.tag = .l_brace;
                    token.loc.end = self.index;
                    return token;
                },
                '}' => {
                    self.index += 1;
                    token.tag = .r_brace;
                    token.loc.end = self.index;
                    return token;
                },
                else => {
                    token.tag = .invalid;
                    token.loc.end = self.index;
                    self.index += 1;
                    return token;
                },
            },
            .bare_string => switch (char) {
                0, ' ', '\t', '\r', '\n', '{', '}' => {
                    break;
                },
                '"', '\'' => {
                    token.tag = .invalid;
                    token.loc.end = self.index;
                    self.index += 1;
                    return token;
                },
                else => {},
            },
            .squote_string => switch (char) {
                '\'' => {
                    self.index += 1;
                    break;
                },
                0, '\n' => {
                    token.tag = .invalid;
                    token.loc.end = self.index;
                    return token;
                },
                else => {},
            },
            .dquote_string => switch (char) {
                '"' => {
                    self.index += 1;
                    break;
                },
                0, '\n' => {
                    token.tag = .invalid;
                    token.loc.end = self.index;
                    return token;
                },
                '\\' => {
                    self.index += 1;
                },
                else => {},
            },
            .newline => switch (char) {
                '\n' => {},
                else => {
                    break;
                },
            },
        }
    }

    token.loc.end = self.index;
    return token;
}

test "tokenizer: minimal" {
    const source = "model A2";
    const expected_tokens = [_]Token{
        .{ .tag = .bare_string, .loc = .{ .start = 0, .end = 5 } },
        .{ .tag = .bare_string, .loc = .{ .start = 6, .end = 8 } },
        .{ .tag = .eof, .loc = .{ .start = 8, .end = 8 } },
    };

        token.loc.end = self.index;
        return token;
    var tokenizer = Tokenizer.init(source);
    var tokens = std.ArrayList(Token).init(testing.allocator);
    defer tokens.deinit();

    while (true) {
        const token = tokenizer.next();
        try tokens.append(token);
        if (token.tag == .eof) break;
    }
};

test {
    try testing.expectEqualSlices(Token, &expected_tokens, tokens.items);
}

test "tokenizer: full" {
    const source =
        \\model "E5" {
        \\   max-speed 320km/h


@@ 152,8 173,6 @@ test {
        \\   weight '453.5t' "\""
        \\}
    ;
    var tokenizer = Tokenizer.init(source);

    const expected_tokens = [_]Token{
        .{ .tag = .bare_string, .loc = .{ .start = 0, .end = 5 } },
        .{ .tag = .dquote_string, .loc = .{ .start = 6, .end = 10 } },


@@ 170,8 189,15 @@ test {
        .{ .tag = .eof, .loc = .{ .start = 60, .end = 60 } },
    };

    for (expected_tokens) |expected_token| {
    var tokenizer = Tokenizer.init(source);
    var tokens = std.ArrayList(Token).init(testing.allocator);
    defer tokens.deinit();

    while (true) {
        const token = tokenizer.next();
        try testing.expectEqual(expected_token, token);
        try tokens.append(token);
        if (token.tag == .eof) break;
    }

    try testing.expectEqualSlices(Token, &expected_tokens, tokens.items);
}