~andreafeletto/zig-scfg

0bb9e3f43a8e567d82fe3436941b2adf9073ebc2 — Andrea Feletto 2 years ago
first commit
7 files changed, 585 insertions(+), 0 deletions(-)

A .gitignore
A LICENSE
A README.md
A ast.zig
A parser.zig
A scfg.zig
A tokenizer.zig
A  => .gitignore +2 -0
@@ 1,2 @@
zig-cache/
zig-out/

A  => LICENSE +21 -0
@@ 1,21 @@
MIT License

Copyright (c) 2022 Andrea Feletto

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

A  => README.md +107 -0
@@ 1,107 @@

# [zig-scfg]

A [zig] library for [scfg].

## Usage

First clone this repository as a submodule.

```sh
git submodule add https://git.sr.ht/~andreafeletto/zig-scfg deps/zig-scfg
```

Than add the following to `build.zig`.

```zig
pub fn build(b: *std.build.Builder) void {
    // ...
    const scfg: Pkg = .{
        .name = "scfg",
        .path = .{ .path = "deps/zig-scfg/scfg.zig" },
    };
    exe.addPackage(scfg);
    // ...
}
```

The library can now be imported into your zig project.

```zig
const scfg = @import("scfg");
```

## Documentation

The function `parse` takes an allocator and a null-terminated string and
generates a tree.
The tree is owned by the caller, who is responsible for calling `deinit`.
The tree contains references to the source string, so the latter should not be
deallocated before the tree.

```zig
const source =
    \\model A2 {
    \\  speed 250
    \\  shape {
    \\    length 50
    \\    width 100
    \\  }
    \\}
    \\model C5 {
    \\  speed 350
    \\  shape {
    \\    length 10
    \\    width 260
    \\  }
    \\}
;

var tree = try scfg.parse(allocator, source);
defer tree.deinit(allocator);
```

The function `getAll` returns a slice (owned by the caller) of the top-level
directives filtered by their name.

```zig
const models = try ast.getAll(allocator, "model");
defer allocator.free(models);
```

The function `find` returns the first top-level directive with matching name
and parameters.
The parameters are compared in order and the lengths must be equal.
If no match is found, `null` is returned.

```zig
const model_c5 = ast.find("model", &.{"C5"}) orelse {
    std.log.err("not found", .{});
    return;
};
```

Every function can also be called on a directive.
The function `get` returns the first top-level directive with matching name.
If no match is found, `null` is returned.

```zig
const model_c5_speed = model_c5.get("speed") orelse {
    std.log.err("not found", .{});
    return;
};
```

The parameters of a directive can be accessed through the `params` field.

```zig
const speed_value = model_c5_speed.params[0];
```

## License

The code in this repository is released under the MIT license.

[zig-scfg]: https://sr.ht/~andreafeletto/zig-scfg/
[zig]: https://ziglang.org/
[scfg]: https://git.sr.ht/~emersion/scfg/

A  => ast.zig +89 -0
@@ 1,89 @@
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;

pub const Ast = struct {
    source: [:0]const u8,
    nodes: []const Node,
    root: Node,

    pub const Node = struct {
        name: []const u8,
        params: []const []const u8,
        children: []const *Node,

        pub fn get(self: *const Node, name: []const u8) ?*const Node {
            for (self.children) |child| {
                if (mem.eql(u8, name, child.name)) {
                    return child;
                }
            }
            return null;
        }

        pub fn getAll(
            self: *const Node,
            allocator: Allocator,
            name: []const u8,
        ) ![]*const Node {
            var nodes = std.ArrayList(*const Node).init(allocator);
            for (self.children) |child| {
                if (mem.eql(u8, name, child.name)) {
                    try nodes.append(child);
                }
            }
            return nodes.toOwnedSlice();
        }

        pub fn find(
            self: *const Node,
            name: []const u8,
            params: []const []const u8,
        ) ?*const Node {
            outer: for (self.children) |child| {
                if (!mem.eql(u8, name, child.name)) {
                    continue;
                }
                if (child.params.len != params.len) {
                    continue;
                }
                for (child.params) |param, i| {
                    if (!mem.eql(u8, params[i], param)) {
                        continue :outer;
                    }
                }
                return child;
            }
            return null;
        }
    };

    pub fn deinit(self: *Ast, allocator: Allocator) void {
        for (self.nodes) |node| {
            allocator.free(node.params);
            allocator.free(node.children);
        }
        allocator.free(self.root.children);
        allocator.free(self.nodes);
    }

    pub fn get(self: *const Ast, name: []const u8) ?*const Node {
        return self.root.get(name);
    }

    pub fn getAll(
        self: *const Ast,
        allocator: Allocator,
        name: []const u8,
    ) ![]*const Node {
        return self.root.getAll(allocator, name);
    }

    pub fn find(
        self: *const Ast,
        name: []const u8,
        params: []const []const u8,
    ) ?*const Node {
        return self.root.find(name, params);
    }
};

A  => parser.zig +105 -0
@@ 1,105 @@
const std = @import("std");
const testing = std.testing;
const Allocator = std.mem.Allocator;

const Token = @import("tokenizer.zig").Token;

pub const Parser = struct {
    allocator: Allocator,
    source: [:0]const u8,

    state: enum { new, update },
    index: usize,

    nodes: std.ArrayListUnmanaged(struct {
        name: []const u8,
        params: std.ArrayListUnmanaged([]const u8),
        children: std.ArrayListUnmanaged(usize),
    }),
    roots: std.ArrayListUnmanaged(usize),
    path: std.ArrayListUnmanaged(usize),

    pub fn init(allocator: Allocator, source: [:0]const u8) Parser {
        return Parser{
            .allocator = allocator,
            .source = source,
            .state = .new,
            .index = 0,
            .nodes = .{},
            .roots = .{},
            .path = .{},
        };
    }

    pub fn deinit(self: *Parser) void {
        for (self.nodes.items) |*node| {
            node.params.deinit(self.allocator);
            node.children.deinit(self.allocator);
        }
        self.nodes.deinit(self.allocator);
        self.roots.deinit(self.allocator);
        self.path.deinit(self.allocator);
    }

    pub fn feed(self: *Parser, token: *const Token) !void {
        switch (self.state) {
            .new => switch (token.tag) {
                .newline => {},
                .bare_string => {
                    try self.nodes.append(self.allocator, .{
                        .name = self.source[token.loc.start..token.loc.end],
                        .params = .{},
                        .children = .{},
                    });
                    if (self.path.items.len != 0) {
                        const parent = self.path.items[self.path.items.len - 1];
                        const siblings = &self.nodes.items[parent].children;
                        try siblings.append(self.allocator, self.index);
                    } else {
                        try self.roots.append(self.allocator, self.index);
                    }
                    self.state = .update;
                    self.index += 1;
                },
                .r_brace => {
                    _ = self.path.pop();
                },
                .eof => {
                    if (self.path.items.len != 0) {
                        return error.InvalidToken;
                    }
                },
                else => {
                    return error.InvalidToken;
                },
            },
            .update => switch (token.tag) {
                .bare_string => {
                    const param = self.source[token.loc.start..token.loc.end];

                    const node = &self.nodes.items[self.index - 1];
                    try node.params.append(self.allocator, param);
                },
                // TODO: handle escape characters for dquote strigs
                .squote_string, .dquote_string => {
                    const start = token.loc.start + 1;
                    const end = token.loc.end - 1;
                    const param = self.source[start..end];

                    const node = &self.nodes.items[self.index - 1];
                    try node.params.append(self.allocator, param);
                },
                .l_brace => {
                    self.state = .new;
                    try self.path.append(self.allocator, self.index - 1);
                },
                .newline, .eof => {
                    self.state = .new;
                },
                else => {
                    return error.InvalidToken;
                },
            },
        }
    }
};

A  => scfg.zig +84 -0
@@ 1,84 @@
const std = @import("std");
const testing = std.testing;
const Allocator = std.mem.Allocator;

const Ast = @import("ast.zig").Ast;
const Parser = @import("parser.zig").Parser;
const Tokenizer = @import("tokenizer.zig").Tokenizer;

pub fn parse(allocator: Allocator, source: [:0]const u8) !Ast {
    var tokenizer = Tokenizer.init(source);
    var parser = Parser.init(allocator, source);
    defer parser.deinit();

    while (true) {
        const token = tokenizer.next();
        try parser.feed(&token);
        if (token.tag == .eof) {
            break;
        }
    }

    const ast_nodes = try allocator.alloc(Ast.Node, parser.nodes.items.len);
    for (parser.nodes.items) |*node, i| {
        const children = try allocator.alloc(
            *Ast.Node,
            node.children.items.len,
        );
        for (node.children.items) |child, j| {
            children[j] = &ast_nodes[child];
        }
        ast_nodes[i] = .{
            .name = node.name,
            .params = node.params.toOwnedSlice(allocator),
            .children = children,
        };
    }

    const roots = try allocator.alloc(*Ast.Node, parser.roots.items.len);
    for (parser.roots.items) |root, i| {
        roots[i] = &ast_nodes[root];
    }

    return Ast{
        .source = source,
        .nodes = ast_nodes,
        .root = .{
            .name = "root",
            .params = &.{},
            .children = roots,
        },
    };
}

test {
    const source =
        \\model A2 {
        \\  speed 250
        \\  shape {
        \\    length 50
        \\    width 100
        \\  }
        \\}
        \\model C5 {
        \\  speed 350
        \\  shape {
        \\    length 10
        \\    width 260
        \\  }
        \\}
    ;

    var ast = try parse(testing.allocator, source);
    defer ast.deinit(testing.allocator);

    const models = try ast.getAll(testing.allocator, "model");
    defer testing.allocator.free(models);
    try testing.expectEqual(@as(usize, 2), models.len);

    const model_c5 = ast.find("model", &.{"C5"}).?;
    try testing.expectEqual(&ast.nodes[5], model_c5);

    const model_c5_speed = model_c5.get("speed").?.params[0];
    try testing.expectEqualStrings("350", model_c5_speed);
}

A  => tokenizer.zig +177 -0
@@ 1,177 @@
const std = @import("std");
const testing = std.testing;

pub const Token = struct {
    tag: Tag,
    loc: Loc,

    pub const Loc = struct {
        start: usize,
        end: usize,
    };

    pub const Tag = enum {
        bare_string,
        squote_string,
        dquote_string,
        l_brace,
        r_brace,
        newline,
        eof,
        invalid,
    };
};

pub const Tokenizer = struct {
    source: [:0]const u8,
    index: usize,

    pub fn init(source: [:0]const u8) Tokenizer {
        return Tokenizer{ .source = source, .index = 0 };
    }

    const State = enum {
        start,
        bare_string,
        squote_string,
        dquote_string,
        newline,
    };

    pub fn next(self: *Tokenizer) Token {
        var state: State = .start;
        var token: Token = .{
            .tag = .eof,
            .loc = .{ .start = self.index, .end = undefined },
        };

        while (true) : (self.index += 1) {
            const char = self.source[self.index];

            switch (state) {
                .start => switch (char) {
                    0 => {
                        break;
                    },
                    ' ', '\t', '\r' => {
                        token.loc.start = self.index + 1;
                    },
                    '\n' => {
                        state = .newline;
                        token.tag = .newline;
                    },
                    '\'' => {
                        state = .squote_string;
                        token.tag = .squote_string;
                    },
                    '"' => {
                        state = .dquote_string;
                        token.tag = .dquote_string;
                    },
                    'a'...'z', 'A'...'Z', '0'...'9', '_' => {
                        state = .bare_string;
                        token.tag = .bare_string;
                    },
                    '{' => {
                        self.index += 1;
                        token.tag = .l_brace;
                        token.loc.end = self.index;
                        return token;
                    },
                    '}' => {
                        self.index += 1;
                        token.tag = .r_brace;
                        token.loc.end = self.index;
                        return token;
                    },
                    else => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        self.index += 1;
                        return token;
                    },
                },
                .bare_string => switch (char) {
                    0, ' ', '\t', '\r', '\n', '{', '}' => {
                        break;
                    },
                    '"', '\'' => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        self.index += 1;
                        return token;
                    },
                    else => {},
                },
                .squote_string => switch (char) {
                    '\'' => {
                        self.index += 1;
                        break;
                    },
                    0, '\n' => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        return token;
                    },
                    else => {},
                },
                .dquote_string => switch (char) {
                    '"' => {
                        self.index += 1;
                        break;
                    },
                    0, '\n' => {
                        token.tag = .invalid;
                        token.loc.end = self.index;
                        return token;
                    },
                    '\\' => {
                        self.index += 1;
                    },
                    else => {},
                },
                .newline => switch (char) {
                    '\n' => {},
                    else => {
                        break;
                    },
                },
            }
        }

        token.loc.end = self.index;
        return token;
    }
};

test {
    const source =
        \\model "E5" {
        \\   max-speed 320km/h
        \\
        \\   weight '453.5t' "\""
        \\}
    ;
    var tokenizer = Tokenizer.init(source);

    const expected_tokens = [_]Token{
        .{ .tag = .bare_string, .loc = .{ .start = 0, .end = 5 } },
        .{ .tag = .dquote_string, .loc = .{ .start = 6, .end = 10 } },
        .{ .tag = .l_brace, .loc = .{ .start = 11, .end = 12 } },
        .{ .tag = .newline, .loc = .{ .start = 12, .end = 13 } },
        .{ .tag = .bare_string, .loc = .{ .start = 16, .end = 25 } },
        .{ .tag = .bare_string, .loc = .{ .start = 26, .end = 33 } },
        .{ .tag = .newline, .loc = .{ .start = 33, .end = 35 } },
        .{ .tag = .bare_string, .loc = .{ .start = 38, .end = 44 } },
        .{ .tag = .squote_string, .loc = .{ .start = 45, .end = 53 } },
        .{ .tag = .dquote_string, .loc = .{ .start = 54, .end = 58 } },
        .{ .tag = .newline, .loc = .{ .start = 58, .end = 59 } },
        .{ .tag = .r_brace, .loc = .{ .start = 59, .end = 60 } },
        .{ .tag = .eof, .loc = .{ .start = 60, .end = 60 } },
    };

    for (expected_tokens) |expected_token| {
        const token = tokenizer.next();
        try testing.expectEqual(expected_token, token);
    }
}