A .build.yml => .build.yml +12 -0
@@ 0,0 1,12 @@
+image: archlinux
+packages:
+ - zig
+sources:
+ - https://git.sr.ht/~andreafeletto/zig-scfg
+tasks:
+ - fmt: |
+ cd zig-scfg
+ zig fmt --check .
+ - test: |
+ cd zig-scfg
+ zig test scfg.zig
M .gitignore => .gitignore +1 -2
@@ 1,2 1,1 @@
-zig-cache/
-zig-out/
+zig-cache
M README.md => README.md +39 -59
@@ 1,17 1,19 @@
# [zig-scfg]
-A [zig] library for [scfg].
+[![builds.sr.ht status](https://builds.sr.ht/~andreafeletto/zig-scfg/commits/main.svg)](https://builds.sr.ht/~andreafeletto/zig-scfg/commits/main)
-## Usage
+A [zig] library for parsing [scfg] configuration files.
+
+## Setup
-First clone this repository as a submodule.
+Clone this repository as a submodule.
```sh
git submodule add https://git.sr.ht/~andreafeletto/zig-scfg deps/zig-scfg
```
-Than add the following to `build.zig`.
+Than add the following to your `build.zig`.
```zig
pub fn build(b: *std.build.Builder) void {
@@ 31,77 33,55 @@ The library can now be imported into your zig project.
const scfg = @import("scfg");
```
-## Documentation
+## Usage
-The function `parse` takes an allocator and a null-terminated string and
-generates a tree.
-The tree is owned by the caller, who is responsible for calling `deinit`.
-The tree contains references to the source string, so the latter should not be
-deallocated before the tree.
+I suggested to use an arena allocator. The resulting tree structure can be quite
+complex, so manual deallocation could be tricky.
```zig
-const source =
- \\model A2 {
- \\ speed 250
- \\ shape {
- \\ length 50
- \\ width 100
- \\ }
- \\}
- \\model C5 {
- \\ speed 350
- \\ shape {
- \\ length 10
- \\ width 260
- \\ }
- \\}
-;
-
-var tree = try scfg.parse(allocator, source);
-defer tree.deinit(allocator);
-```
-
-The function `getAll` returns a slice (owned by the caller) of the top-level
-directives filtered by their name.
+const std = @import("std");
+const scfg = @import("scfg");
-```zig
-const models = try ast.getAll(allocator, "model");
-defer allocator.free(models);
-```
+pub fn main() !void {
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena.deinit();
+ const allocator = arena.allocator();
-The function `find` returns the first top-level directive with matching name
-and parameters.
-The parameters are compared in order and the lengths must be equal.
-If no match is found, `null` is returned.
+ const file = try std.fs.cwd().openFile("example.scfg", .{});
+ // `source` must be a null-terminated string
+ const source = try file.readToEndAllocOptions(
+ allocator, 1_000_000, null, @alignOf(u8), 0
+ );
-```zig
-const model_c5 = ast.find("model", &.{"C5"}) orelse {
- std.log.err("not found", .{});
- return;
-};
+ const root = try scfg.parse(allocator, source);
+ std.log.info("identifier of the first directive: {s}", .{root[0].name});
+}
```
-Every function can also be called on a directive.
-The function `get` returns the first top-level directive with matching name.
-If no match is found, `null` is returned.
+The result of the `parse` function is a slice of pointers to directives.
+Each directive has the following recursive structure:
```zig
-const model_c5_speed = model_c5.get("speed") orelse {
- std.log.err("not found", .{});
- return;
+const Directive = struct {
+ name: []const u8,
+ params: [][]const u8,
+ blocks: [][]*Directive,
};
```
-The parameters of a directive can be accessed through the `params` field.
-
-```zig
-const speed_value = model_c5_speed.params[0];
-```
-
-## License
+## Contributing
The code in this repository is released under the MIT license.
+You are welcome to send patches to the [mailing list] or report bugs on the
+[issue tracker].
+
+If you aren't familiar with `git send-email`, you can use the [web interface]
+or learn about it following this excellent [tutorial].
[zig-scfg]: https://sr.ht/~andreafeletto/zig-scfg/
[zig]: https://ziglang.org/
[scfg]: https://git.sr.ht/~emersion/scfg/
+[mailing list]: https://lists.sr.ht/~andreafeletto/public-inbox
+[issue tracker]: https://todo.sr.ht/~andreafeletto/zig-scfg
+[web interface]: https://git.sr.ht/~andreafeletto/zig-scfg/send-email
+[tutorial]: https://git-send-email.io
D ast.zig => ast.zig +0 -89
@@ 1,89 0,0 @@
-const std = @import("std");
-const mem = std.mem;
-const Allocator = mem.Allocator;
-
-pub const Ast = struct {
- source: [:0]const u8,
- nodes: []const Node,
- root: Node,
-
- pub const Node = struct {
- name: []const u8,
- params: []const []const u8,
- children: []const *Node,
-
- pub fn get(self: *const Node, name: []const u8) ?*const Node {
- for (self.children) |child| {
- if (mem.eql(u8, name, child.name)) {
- return child;
- }
- }
- return null;
- }
-
- pub fn getAll(
- self: *const Node,
- allocator: Allocator,
- name: []const u8,
- ) ![]*const Node {
- var nodes = std.ArrayList(*const Node).init(allocator);
- for (self.children) |child| {
- if (mem.eql(u8, name, child.name)) {
- try nodes.append(child);
- }
- }
- return nodes.toOwnedSlice();
- }
-
- pub fn find(
- self: *const Node,
- name: []const u8,
- params: []const []const u8,
- ) ?*const Node {
- outer: for (self.children) |child| {
- if (!mem.eql(u8, name, child.name)) {
- continue;
- }
- if (child.params.len != params.len) {
- continue;
- }
- for (child.params) |param, i| {
- if (!mem.eql(u8, params[i], param)) {
- continue :outer;
- }
- }
- return child;
- }
- return null;
- }
- };
-
- pub fn deinit(self: *Ast, allocator: Allocator) void {
- for (self.nodes) |node| {
- allocator.free(node.params);
- allocator.free(node.children);
- }
- allocator.free(self.root.children);
- allocator.free(self.nodes);
- }
-
- pub fn get(self: *const Ast, name: []const u8) ?*const Node {
- return self.root.get(name);
- }
-
- pub fn getAll(
- self: *const Ast,
- allocator: Allocator,
- name: []const u8,
- ) ![]*const Node {
- return self.root.getAll(allocator, name);
- }
-
- pub fn find(
- self: *const Ast,
- name: []const u8,
- params: []const []const u8,
- ) ?*const Node {
- return self.root.find(name, params);
- }
-};
A example.scfg => example.scfg +15 -0
@@ 0,0 1,15 @@
+train "Shinkansen" {
+ model "E5" {
+ max-speed 320km/h
+ weight 453.5t
+
+ lines-served "Tōhoku" "Hokkaido"
+ }
+
+ model "E7" {
+ max-speed 275km/h
+ weight 540t
+
+ lines-served "Hokuriku" "Jōetsu"
+ }
+}
A example.zig => example.zig +15 -0
@@ 0,0 1,15 @@
+const std = @import("std");
+const scfg = @import("scfg.zig");
+
+pub fn main() !void {
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena.deinit();
+ const allocator = arena.allocator();
+
+ const file = try std.fs.cwd().openFile("example.scfg", .{});
+ // `source` must be a null-terminated string
+ const source = try file.readToEndAllocOptions(allocator, 1_000_000, null, @alignOf(u8), 0);
+
+ const root = try scfg.parse(allocator, source);
+ std.log.info("identifier of the first directive: {s}", .{root[0].name});
+}
D parser.zig => parser.zig +0 -105
@@ 1,105 0,0 @@
-const std = @import("std");
-const testing = std.testing;
-const Allocator = std.mem.Allocator;
-
-const Token = @import("tokenizer.zig").Token;
-
-pub const Parser = struct {
- allocator: Allocator,
- source: [:0]const u8,
-
- state: enum { new, update },
- index: usize,
-
- nodes: std.ArrayListUnmanaged(struct {
- name: []const u8,
- params: std.ArrayListUnmanaged([]const u8),
- children: std.ArrayListUnmanaged(usize),
- }),
- roots: std.ArrayListUnmanaged(usize),
- path: std.ArrayListUnmanaged(usize),
-
- pub fn init(allocator: Allocator, source: [:0]const u8) Parser {
- return Parser{
- .allocator = allocator,
- .source = source,
- .state = .new,
- .index = 0,
- .nodes = .{},
- .roots = .{},
- .path = .{},
- };
- }
-
- pub fn deinit(self: *Parser) void {
- for (self.nodes.items) |*node| {
- node.params.deinit(self.allocator);
- node.children.deinit(self.allocator);
- }
- self.nodes.deinit(self.allocator);
- self.roots.deinit(self.allocator);
- self.path.deinit(self.allocator);
- }
-
- pub fn feed(self: *Parser, token: *const Token) !void {
- switch (self.state) {
- .new => switch (token.tag) {
- .newline => {},
- .bare_string => {
- try self.nodes.append(self.allocator, .{
- .name = self.source[token.loc.start..token.loc.end],
- .params = .{},
- .children = .{},
- });
- if (self.path.items.len != 0) {
- const parent = self.path.items[self.path.items.len - 1];
- const siblings = &self.nodes.items[parent].children;
- try siblings.append(self.allocator, self.index);
- } else {
- try self.roots.append(self.allocator, self.index);
- }
- self.state = .update;
- self.index += 1;
- },
- .r_brace => {
- _ = self.path.pop();
- },
- .eof => {
- if (self.path.items.len != 0) {
- return error.InvalidToken;
- }
- },
- else => {
- return error.InvalidToken;
- },
- },
- .update => switch (token.tag) {
- .bare_string => {
- const param = self.source[token.loc.start..token.loc.end];
-
- const node = &self.nodes.items[self.index - 1];
- try node.params.append(self.allocator, param);
- },
- // TODO: handle escape characters for dquote strigs
- .squote_string, .dquote_string => {
- const start = token.loc.start + 1;
- const end = token.loc.end - 1;
- const param = self.source[start..end];
-
- const node = &self.nodes.items[self.index - 1];
- try node.params.append(self.allocator, param);
- },
- .l_brace => {
- self.state = .new;
- try self.path.append(self.allocator, self.index - 1);
- },
- .newline, .eof => {
- self.state = .new;
- },
- else => {
- return error.InvalidToken;
- },
- },
- }
- }
-};
M scfg.zig => scfg.zig +63 -51
@@ 2,14 2,20 @@ const std = @import("std");
const testing = std.testing;
const Allocator = std.mem.Allocator;
-const Ast = @import("ast.zig").Ast;
-const Parser = @import("parser.zig").Parser;
-const Tokenizer = @import("tokenizer.zig").Tokenizer;
+const Parser = @import("src/Parser.zig");
+const Tokenizer = @import("src/Tokenizer.zig");
-pub fn parse(allocator: Allocator, source: [:0]const u8) !Ast {
+const Block = []*Directive;
+
+const Directive = struct {
+ name: []const u8,
+ params: [][]const u8,
+ blocks: []Block,
+};
+
+pub fn parse(allocator: Allocator, source: [:0]const u8) !Block {
var tokenizer = Tokenizer.init(source);
- var parser = Parser.init(allocator, source);
- defer parser.deinit();
+ var parser: Parser = .{ .allocator = allocator, .source = source };
while (true) {
const token = tokenizer.next();
@@ 19,66 25,72 @@ pub fn parse(allocator: Allocator, source: [:0]const u8) !Ast {
}
}
- const ast_nodes = try allocator.alloc(Ast.Node, parser.nodes.items.len);
- for (parser.nodes.items) |*node, i| {
- const children = try allocator.alloc(
- *Ast.Node,
- node.children.items.len,
- );
- for (node.children.items) |child, j| {
- children[j] = &ast_nodes[child];
+ const directives = try allocator.alloc(Directive, parser.directives.items.len);
+ const blocks = try allocator.alloc(Block, parser.blocks.items.len);
+
+ // convert blocks from arrays of indeces to arrays of pointers
+ for (parser.blocks.items) |*block, i| {
+ blocks[i] = try allocator.alloc(*Directive, block.items.len);
+ for (block.items) |directive_idx, j| {
+ blocks[i][j] = &directives[directive_idx];
}
- ast_nodes[i] = .{
- .name = node.name,
- .params = node.params.toOwnedSlice(allocator),
- .children = children,
- };
}
- const roots = try allocator.alloc(*Ast.Node, parser.roots.items.len);
- for (parser.roots.items) |root, i| {
- roots[i] = &ast_nodes[root];
+ // copy directives and replace parser blocks with pointer-based blocks
+ for (parser.directives.items) |*directive, i| {
+ directives[i] = .{
+ .name = directive.name,
+ .params = directive.params.toOwnedSlice(allocator),
+ .blocks = try allocator.alloc(Block, directive.blocks.items.len),
+ };
+ for (directive.blocks.items) |block_idx, j| {
+ directives[i].blocks[j] = blocks[block_idx];
+ }
}
- return Ast{
- .source = source,
- .nodes = ast_nodes,
- .root = .{
- .name = "root",
- .params = &.{},
- .children = roots,
- },
- };
+ _ = allocator.resize(blocks, 1);
+ return blocks[0];
+}
+
+test "parse: minimal" {
+ const source = "model A2 thin";
+
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena.deinit();
+
+ const root = try parse(arena.allocator(), source);
+ try testing.expectEqual(@as(usize, 1), root.len);
+ try testing.expectEqualStrings("model", root[0].name);
+ try testing.expectEqual(@as(usize, 2), root[0].params.len);
+ try testing.expectEqualStrings("A2", root[0].params[0]);
+ try testing.expectEqualStrings("thin", root[0].params[1]);
}
-test {
+test "parse: directives with a block" {
const source =
\\model A2 {
\\ speed 250
- \\ shape {
- \\ length 50
- \\ width 100
- \\ }
\\}
- \\model C5 {
- \\ speed 350
- \\ shape {
- \\ length 10
- \\ width 260
- \\ }
+ \\model A3 {
+ \\ speed 270
\\}
;
- var ast = try parse(testing.allocator, source);
- defer ast.deinit(testing.allocator);
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena.deinit();
- const models = try ast.getAll(testing.allocator, "model");
- defer testing.allocator.free(models);
- try testing.expectEqual(@as(usize, 2), models.len);
+ const root = try parse(arena.allocator(), source);
+ try testing.expectEqual(@as(usize, 2), root.len);
- const model_c5 = ast.find("model", &.{"C5"}).?;
- try testing.expectEqual(&ast.nodes[5], model_c5);
+ try testing.expectEqualStrings("model", root[0].name);
+ try testing.expectEqual(@as(usize, 1), root[0].params.len);
+ try testing.expectEqualStrings("A2", root[0].params[0]);
+ try testing.expectEqualStrings("speed", root[0].blocks[0][0].name);
+ try testing.expectEqualStrings("250", root[0].blocks[0][0].params[0]);
- const model_c5_speed = model_c5.get("speed").?.params[0];
- try testing.expectEqualStrings("350", model_c5_speed);
+ try testing.expectEqualStrings("model", root[1].name);
+ try testing.expectEqual(@as(usize, 1), root[1].params.len);
+ try testing.expectEqualStrings("A3", root[1].params[0]);
+ try testing.expectEqualStrings("speed", root[1].blocks[0][0].name);
+ try testing.expectEqualStrings("270", root[1].blocks[0][0].params[0]);
}
A src/Parser.zig => src/Parser.zig +166 -0
@@ 0,0 1,166 @@
+const std = @import("std");
+const mem = std.mem;
+const testing = std.testing;
+const ArrayListUnmanaged = std.ArrayListUnmanaged;
+
+const Tokenizer = @import("Tokenizer.zig");
+const Token = Tokenizer.Token;
+const Parser = @This();
+
+const Directive = struct {
+ name: []const u8,
+ params: ArrayListUnmanaged([]const u8),
+ // indeces on the Parser.blocks list
+ blocks: ArrayListUnmanaged(usize),
+};
+
+// indeces on the Parser.directives list
+const Block = ArrayListUnmanaged(usize);
+
+allocator: mem.Allocator,
+source: [:0]const u8,
+
+state: enum { new, update } = .new,
+directive_idx: usize = 0,
+
+directives: ArrayListUnmanaged(Directive) = .{},
+blocks: ArrayListUnmanaged(Block) = .{},
+path: ArrayListUnmanaged(usize) = .{},
+
+pub fn feed(self: *Parser, token: *const Token) !void {
+ switch (self.state) {
+ .new => switch (token.tag) {
+ .newline => {},
+ .bare_string => {
+ try self.directives.append(self.allocator, .{
+ .name = self.source[token.loc.start..token.loc.end],
+ .params = .{},
+ .blocks = .{},
+ });
+ self.directive_idx = self.directives.items.len - 1;
+
+ // create top-level block on first directive
+ if (self.blocks.items.len == 0) {
+ try self.blocks.append(self.allocator, .{});
+ try self.path.append(self.allocator, 0);
+ }
+
+ // append newly created directive to current block
+ const block_idx = self.path.items[self.path.items.len - 1];
+ const block = &self.blocks.items[block_idx];
+ try block.append(self.allocator, self.directive_idx);
+
+ self.state = .update;
+ },
+ .r_brace => {
+ if (self.path.items.len == 1) {
+ return error.InvalidToken;
+ }
+ _ = self.path.pop();
+ self.state = .update;
+ },
+ .eof => {
+ if (self.path.items.len > 1) {
+ return error.InvalidToken;
+ }
+ },
+ else => {
+ return error.InvalidToken;
+ },
+ },
+ .update => switch (token.tag) {
+ .bare_string => {
+ const directive = &self.directives.items[self.directive_idx];
+ const param = self.source[token.loc.start..token.loc.end];
+ try directive.params.append(self.allocator, param);
+ },
+ // TODO: handle escape characters for dquote strigs
+ .squote_string, .dquote_string => {
+ const directive = &self.directives.items[self.directive_idx];
+ const start = token.loc.start + 1;
+ const end = token.loc.end - 1;
+ const param = self.source[start..end];
+ try directive.params.append(self.allocator, param);
+ },
+ .l_brace => {
+ try self.blocks.append(self.allocator, .{});
+ const block_idx = self.blocks.items.len - 1;
+
+ const directive = &self.directives.items[self.directive_idx];
+ try directive.blocks.append(self.allocator, block_idx);
+ try self.path.append(self.allocator, block_idx);
+ self.state = .new;
+ },
+ .newline, .eof => {
+ if (token.tag == .eof and self.path.items.len > 1) {
+ return error.InvalidToken;
+ }
+ self.state = .new;
+ },
+ else => {
+ return error.InvalidToken;
+ },
+ },
+ }
+}
+
+test "parser: minimal" {
+ const source = "model A2 'A3'";
+
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena.deinit();
+
+ var tokenizer = Tokenizer.init(source);
+ var parser: Parser = .{ .allocator = arena.allocator(), .source = source };
+
+ while (true) {
+ const token = tokenizer.next();
+ try parser.feed(&token);
+ if (token.tag == .eof) break;
+ }
+
+ try testing.expectEqual(@as(usize, 1), parser.directives.items.len);
+ try testing.expectEqualStrings("model", parser.directives.items[0].name);
+
+ try testing.expectEqual(@as(usize, 2), parser.directives.items[0].params.items.len);
+ try testing.expectEqualStrings("A2", parser.directives.items[0].params.items[0]);
+ try testing.expectEqualStrings("A3", parser.directives.items[0].params.items[1]);
+
+ try testing.expectEqual(@as(usize, 1), parser.blocks.items.len);
+ try testing.expectEqualSlices(usize, &.{0}, parser.blocks.items[0].items);
+}
+
+test "parser: directive with a block" {
+ const source =
+ \\model A2 {
+ \\ speed 250 kmph
+ \\}
+ ;
+
+ var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena.deinit();
+
+ var tokenizer = Tokenizer.init(source);
+ var parser: Parser = .{ .allocator = arena.allocator(), .source = source };
+
+ while (true) {
+ const token = tokenizer.next();
+ try parser.feed(&token);
+ if (token.tag == .eof) break;
+ }
+
+ try testing.expectEqual(@as(usize, 2), parser.blocks.items.len);
+ try testing.expectEqual(@as(usize, 2), parser.directives.items.len);
+
+ try testing.expectEqualStrings("model", parser.directives.items[0].name);
+ try testing.expectEqual(@as(usize, 1), parser.directives.items[0].params.items.len);
+ try testing.expectEqualStrings("A2", parser.directives.items[0].params.items[0]);
+
+ try testing.expectEqual(@as(usize, 1), parser.directives.items[0].blocks.items.len);
+ try testing.expectEqual(@as(usize, 1), parser.directives.items[0].blocks.items[0]);
+
+ try testing.expectEqualStrings("speed", parser.directives.items[1].name);
+ try testing.expectEqual(@as(usize, 2), parser.directives.items[1].params.items.len);
+ try testing.expectEqualStrings("250", parser.directives.items[1].params.items[0]);
+ try testing.expectEqualStrings("kmph", parser.directives.items[1].params.items[1]);
+}
R tokenizer.zig => src/Tokenizer.zig +145 -119
@@ 1,6 1,8 @@
const std = @import("std");
const testing = std.testing;
+const Tokenizer = @This();
+
pub const Token = struct {
tag: Tag,
loc: Loc,
@@ 22,129 24,148 @@ pub const Token = struct {
};
};
-pub const Tokenizer = struct {
- source: [:0]const u8,
- index: usize,
+const State = enum {
+ start,
+ bare_string,
+ squote_string,
+ dquote_string,
+ newline,
+};
- pub fn init(source: [:0]const u8) Tokenizer {
- return Tokenizer{ .source = source, .index = 0 };
- }
+source: [:0]const u8,
+index: usize,
- const State = enum {
- start,
- bare_string,
- squote_string,
- dquote_string,
- newline,
+pub fn init(source: [:0]const u8) Tokenizer {
+ return Tokenizer{ .source = source, .index = 0 };
+}
+
+pub fn next(self: *Tokenizer) Token {
+ var state: State = .start;
+ var token: Token = .{
+ .tag = .eof,
+ .loc = .{ .start = self.index, .end = undefined },
};
- pub fn next(self: *Tokenizer) Token {
- var state: State = .start;
- var token: Token = .{
- .tag = .eof,
- .loc = .{ .start = self.index, .end = undefined },
- };
-
- while (true) : (self.index += 1) {
- const char = self.source[self.index];
-
- switch (state) {
- .start => switch (char) {
- 0 => {
- break;
- },
- ' ', '\t', '\r' => {
- token.loc.start = self.index + 1;
- },
- '\n' => {
- state = .newline;
- token.tag = .newline;
- },
- '\'' => {
- state = .squote_string;
- token.tag = .squote_string;
- },
- '"' => {
- state = .dquote_string;
- token.tag = .dquote_string;
- },
- 'a'...'z', 'A'...'Z', '0'...'9', '_' => {
- state = .bare_string;
- token.tag = .bare_string;
- },
- '{' => {
- self.index += 1;
- token.tag = .l_brace;
- token.loc.end = self.index;
- return token;
- },
- '}' => {
- self.index += 1;
- token.tag = .r_brace;
- token.loc.end = self.index;
- return token;
- },
- else => {
- token.tag = .invalid;
- token.loc.end = self.index;
- self.index += 1;
- return token;
- },
- },
- .bare_string => switch (char) {
- 0, ' ', '\t', '\r', '\n', '{', '}' => {
- break;
- },
- '"', '\'' => {
- token.tag = .invalid;
- token.loc.end = self.index;
- self.index += 1;
- return token;
- },
- else => {},
- },
- .squote_string => switch (char) {
- '\'' => {
- self.index += 1;
- break;
- },
- 0, '\n' => {
- token.tag = .invalid;
- token.loc.end = self.index;
- return token;
- },
- else => {},
- },
- .dquote_string => switch (char) {
- '"' => {
- self.index += 1;
- break;
- },
- 0, '\n' => {
- token.tag = .invalid;
- token.loc.end = self.index;
- return token;
- },
- '\\' => {
- self.index += 1;
- },
- else => {},
- },
- .newline => switch (char) {
- '\n' => {},
- else => {
- break;
- },
- },
- }
+ while (true) : (self.index += 1) {
+ const char = self.source[self.index];
+
+ switch (state) {
+ .start => switch (char) {
+ 0 => {
+ break;
+ },
+ ' ', '\t', '\r' => {
+ token.loc.start = self.index + 1;
+ },
+ '\n' => {
+ state = .newline;
+ token.tag = .newline;
+ },
+ '\'' => {
+ state = .squote_string;
+ token.tag = .squote_string;
+ },
+ '"' => {
+ state = .dquote_string;
+ token.tag = .dquote_string;
+ },
+ 'a'...'z', 'A'...'Z', '0'...'9', '_' => {
+ state = .bare_string;
+ token.tag = .bare_string;
+ },
+ '{' => {
+ self.index += 1;
+ token.tag = .l_brace;
+ token.loc.end = self.index;
+ return token;
+ },
+ '}' => {
+ self.index += 1;
+ token.tag = .r_brace;
+ token.loc.end = self.index;
+ return token;
+ },
+ else => {
+ token.tag = .invalid;
+ token.loc.end = self.index;
+ self.index += 1;
+ return token;
+ },
+ },
+ .bare_string => switch (char) {
+ 0, ' ', '\t', '\r', '\n', '{', '}' => {
+ break;
+ },
+ '"', '\'' => {
+ token.tag = .invalid;
+ token.loc.end = self.index;
+ self.index += 1;
+ return token;
+ },
+ else => {},
+ },
+ .squote_string => switch (char) {
+ '\'' => {
+ self.index += 1;
+ break;
+ },
+ 0, '\n' => {
+ token.tag = .invalid;
+ token.loc.end = self.index;
+ return token;
+ },
+ else => {},
+ },
+ .dquote_string => switch (char) {
+ '"' => {
+ self.index += 1;
+ break;
+ },
+ 0, '\n' => {
+ token.tag = .invalid;
+ token.loc.end = self.index;
+ return token;
+ },
+ '\\' => {
+ self.index += 1;
+ },
+ else => {},
+ },
+ .newline => switch (char) {
+ '\n' => {},
+ else => {
+ break;
+ },
+ },
}
+ }
+
+ token.loc.end = self.index;
+ return token;
+}
+
+test "tokenizer: minimal" {
+ const source = "model A2";
+ const expected_tokens = [_]Token{
+ .{ .tag = .bare_string, .loc = .{ .start = 0, .end = 5 } },
+ .{ .tag = .bare_string, .loc = .{ .start = 6, .end = 8 } },
+ .{ .tag = .eof, .loc = .{ .start = 8, .end = 8 } },
+ };
- token.loc.end = self.index;
- return token;
+ var tokenizer = Tokenizer.init(source);
+ var tokens = std.ArrayList(Token).init(testing.allocator);
+ defer tokens.deinit();
+
+ while (true) {
+ const token = tokenizer.next();
+ try tokens.append(token);
+ if (token.tag == .eof) break;
}
-};
-test {
+ try testing.expectEqualSlices(Token, &expected_tokens, tokens.items);
+}
+
+test "tokenizer: full" {
const source =
\\model "E5" {
\\ max-speed 320km/h
@@ 152,8 173,6 @@ test {
\\ weight '453.5t' "\""
\\}
;
- var tokenizer = Tokenizer.init(source);
-
const expected_tokens = [_]Token{
.{ .tag = .bare_string, .loc = .{ .start = 0, .end = 5 } },
.{ .tag = .dquote_string, .loc = .{ .start = 6, .end = 10 } },
@@ 170,8 189,15 @@ test {
.{ .tag = .eof, .loc = .{ .start = 60, .end = 60 } },
};
- for (expected_tokens) |expected_token| {
+ var tokenizer = Tokenizer.init(source);
+ var tokens = std.ArrayList(Token).init(testing.allocator);
+ defer tokens.deinit();
+
+ while (true) {
const token = tokenizer.next();
- try testing.expectEqual(expected_token, token);
+ try tokens.append(token);
+ if (token.tag == .eof) break;
}
+
+ try testing.expectEqualSlices(Token, &expected_tokens, tokens.items);
}