~coder_kalyan/femtoc

7b9c7cf639e77a6edd0d888352ce1ea3d9975c1f — Kalyan Sriram 4 months ago 6505e44 master
ast: add basic parser implementation

The parser can build a tree for the factorial sample.
5 files changed, 601 insertions(+), 20 deletions(-)

A build.zig
M src/ast.zig
M src/lex.zig
M src/main.zig
M src/parse.zig
A build.zig => build.zig +34 -0
@@ 0,0 1,34 @@
const std = @import("std");

pub fn build(b: *std.build.Builder) void {
    // Standard target options allows the person running `zig build` to choose
    // what target to build for. Here we do not override the defaults, which
    // means any target is allowed, and the default is native. Other options
    // for restricting supported target set are available.
    const target = b.standardTargetOptions(.{});

    // Standard release options allow the person running `zig build` to select
    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall.
    const mode = b.standardReleaseOptions();

    const exe = b.addExecutable("femtoc", "src/main.zig");
    exe.setTarget(target);
    exe.setBuildMode(mode);
    exe.install();

    const run_cmd = exe.run();
    run_cmd.step.dependOn(b.getInstallStep());
    if (b.args) |args| {
        run_cmd.addArgs(args);
    }

    const run_step = b.step("run", "Run the app");
    run_step.dependOn(&run_cmd.step);

    const exe_tests = b.addTest("src/main.zig");
    exe_tests.setTarget(target);
    exe_tests.setBuildMode(mode);

    const test_step = b.step("test", "Run unit tests");
    test_step.dependOn(&exe_tests.step);
}

M src/ast.zig => src/ast.zig +90 -4
@@ 2,13 2,99 @@ const std = @import("std");
const lex = @import("lex.zig");
const Token = lex.Token;

const Node = struct {
pub const Error = error { InvalidNode };
pub const TokenIndex = u32;
pub const ByteOffset = u32;

pub const TokenList = std.MultiArrayList(struct {
    tag: Token.Tag,
    start: ByteOffset,
});

pub const Node = struct {
    tag: Tag,
    main_token: TokenIndex,
    data: Data,

    pub const Data = struct {
        l: Index,
        r: Index,
    };

    pub const Index = u32;

    pub const Tag = enum {
        use,
        type_decl,
        const_decl,
        struct_proto,
        fn_proto,
        fn_decl,
        ret_stmt,
        block,
        add,
        sub,
        mul,
        div,
        int_lit,
        float_lit,
        ident_expr,
        call_expr,
        bin_expr,
    };

    pub const Range = struct {
        start: Index,
        end: Index,
    };

    pub const FnProto = struct {
        params_start: Index,
        params_end: Index,
    };

    // pub const GlobalDecl = struct {
    //     mutable: bool,
    //     ident: []u8,
    //     type_node: Index,
    //     value_node: Index,
    // };
    //
    // pub const FnProto = struct {
    //     params_start: Index,
    //     params_end: Index,
    //     return_type: Index,
    // };
};

const Ast = struct {
pub const Ast = struct {
    source: [:0]const u8,
    tokens: []const Token,
    nodes: []const Node,
    tokens: TokenList.Slice,
    nodes: std.MultiArrayList(Node).Slice,
    extra_data: []Node.Index,
    //errors: []const Error,

    pub fn extraData(self: *Ast, index: usize, comptime T: type) T {
        const fields = std.meta.fields(T);
        var result: T = undefined;
        inline for (fields) |field, i| {
            comptime std.debug.assert(field.type == Node.Index);
            @field(result, field.name) = self.extra_data[index + i];
        }
        return result;
    }

    pub fn parseGlobalDecl(self: *Ast, index: usize) !Node.GlobalDecl {
        if (self.nodes(.tag)[index] != .global_decl) return Error.InvalidNode;

        const main_token = self.nodes(.main_token)[index];
        const data = self.nodes(.data)[index];
        const mutable = self.tokens[main_token + 1] == .k_mut;
        return .{
            .mutable = mutable,
            .ident = if (mutable) self.tokens[main_token + 2] else self.tokens[main_token + 1],
            .type_node = data.l,
            .value_node = data.r,
        };
    }
};

M src/lex.zig => src/lex.zig +2 -0
@@ 91,6 91,7 @@ pub const Token = struct {

        // keywords
        k_use,
        k_as,
        k_fn,
        k_assoc,
        k_return,


@@ 114,6 115,7 @@ pub const Token = struct {

    pub const keywords = std.ComptimeStringMap(Tag, .{
        .{ "use", .k_use },
        .{ "as", .k_as },
        .{ "fn", .k_fn },
        .{ "assoc", .k_assoc },
        .{ "return", .k_return },

M src/main.zig => src/main.zig +5 -14
@@ 1,5 1,5 @@
const std = @import("std");
const lex = @import("lex.zig");
const parse = @import("parse.zig");

const max_file_size = std.math.maxInt(u32);



@@ 7,7 7,7 @@ pub fn main() anyerror!void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const allocator = gpa.allocator();

    var file = try std.fs.cwd().openFile("lang.fm", .{});
    var file = try std.fs.cwd().openFile("fact.fm", .{});
    defer file.close();

    const stat = try file.stat();


@@ 23,16 23,7 @@ pub fn main() anyerror!void {
        std.process.exit(1);
    }

    var lexer = lex.Lexer.init(source);
    while (true) {
        const token = lexer.next();
        if (token.tag == .eof) {
            break;
        }

        std.debug.print("({} {}..{}) ", .{token.tag, token.loc.start, token.loc.end});
        if (token.tag == .semi) {
            std.debug.print("\n", .{});
        }
    }
    const ast = try parse.parse(allocator, source);
    std.log.debug("{any} {any}", .{ast.nodes.items(.tag), ast.nodes.items(.main_token)});
    std.log.debug("{any}", .{ast.nodes.items(.data)});
}

M src/parse.zig => src/parse.zig +470 -2
@@ 1,8 1,55 @@
const std = @import("std");
const lex = @import("lex.zig");
const ast = @import("ast.zig");

const Allocator = std.mem.Allocator;
const Token = lex.Token;
const Tokenizer = lex.Tokenizer;
const Lexer = lex.Lexer;
const Node = ast.Node;
const Ast = ast.Ast;
const TokenIndex = ast.TokenIndex;

pub const Error = error { ParseError } || Allocator.Error;
const null_node: Node.Index = 0;

pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
    var tokens = ast.TokenList{};
    defer tokens.deinit(gpa);

    var lexer = Lexer.init(source);
    while (true) {
        const token = lexer.next();
        try tokens.append(gpa, .{
            .tag = token.tag,
            .start = @intCast(u32, token.loc.start),
        });
        if (token.tag == .eof) break;
    }

    // std.log.debug("{any}", .{tokens.items(.tag)});

    var parser = Parser {
        .source = source,
        .gpa = gpa,
        .token_tags = tokens.items(.tag),
        .token_starts = tokens.items(.start),
        .index = 0,
        .nodes = .{},
        .extra_data = .{},
        .scratch = .{},
    };
    defer parser.nodes.deinit(gpa);
    defer parser.extra_data.deinit(gpa);

    parser.parseRoot();

    return Ast {
        .source = source,
        .tokens = tokens.toOwnedSlice(),
        .nodes = parser.nodes.toOwnedSlice(),
        .extra_data = parser.extra_data.toOwnedSlice(gpa),
    };
}

const Parser = struct {
    gpa: Allocator,


@@ 12,5 59,426 @@ const Parser = struct {
    token_starts: []const u32,
    index: u32,

    nodes: std.ArrayListUnmanaged(Node),
    nodes: std.MultiArrayList(Node),
    extra_data: std.ArrayListUnmanaged(Node.Index),
    scratch: std.ArrayListUnmanaged(Node.Index),
    // errors: std.ArrayListUnmanaged(Ast.Error),

    fn addNode(self: *Parser, node: Node) !Node.Index {
        const result = @intCast(Node.Index, self.nodes.len);
        try self.nodes.append(self.gpa, node);
        return result;
    }

    fn setNode(self: *Parser, i: usize, node: Node) Node.Index {
        self.nodes.set(i, node);
        return @intCast(Node.Index, i);
    }

    fn reserveNode(self: *Parser, tag: ast.Node.Tag) !usize {
        try self.nodes.resize(self.gpa, self.nodes.len + 1);
        self.nodes.items(.tag)[self.nodes.len - 1] = tag;
        return self.nodes.len - 1;
    }

    fn eatToken(self: *Parser, tag: Token.Tag) ?TokenIndex {
        if (self.token_tags[self.index] == tag) {
            self.index += 1;
            return self.index - 1;
        } else {
            return null;
        }
    }

    fn expectToken(self: *Parser, tag: Token.Tag) Error!TokenIndex {
        if (self.eatToken(tag)) |token| {
            return token;
        } else {
            return Error.ParseError;
        }
    }

    fn addExtra(self: *Parser, extra: anytype) Allocator.Error!Node.Index {
        const fields = std.meta.fields(@TypeOf(extra));
        try self.extra_data.ensureUnusedCapacity(self.gpa, fields.len);
        const len = @intCast(u32, self.extra_data.items.len);
        inline for (fields) |field| {
            comptime std.debug.assert(field.field_type == Node.Index);
            self.extra_data.appendAssumeCapacity(@field(extra, field.name));
        }
        return len;
    }

    pub fn parseRoot(self: *Parser) void {
        while (true) {
            const tag = self.token_tags[self.index];
            const node: Node.Index = switch (tag) {
                // .k_use => self.parseUse() catch null_node,
                // .k_type => self.parseTypeDecl() catch null_node,
                .k_let => self.parseDecl() catch null_node,
                .eof => break,
                else => node: {
                    self.index += 1;
                    break :node null_node;
                },
            };

            _ = node;
        }
    }

    fn parseDecl(p: *Parser) !Node.Index {
        const global_node = try p.reserveNode(.const_decl);

        const let_token = p.eatToken(.k_let) orelse return null_node;
        if (p.token_tags[p.index] == .k_mut) p.index += 1;
        _ = try p.expectToken(.ident);
        const type_node: Node.Index = if (p.eatToken(.colon) == null) 0 else try p.expectTypeExpr();
        // TODO: extern symbols will only be declarations
        _ = try p.expectToken(.equal);
        const value_node = try p.expectExpr();

        return p.setNode(global_node, .{
            .tag = .const_decl,
            .main_token = let_token,
            .data = .{
                .l = type_node,
                .r = value_node,
            },
        });
    }

    fn precedence(tag: Token.Tag) i32 {
        return switch (tag) {
            .plus => 10,
            .minus => 20,
            .asterisk => 30,
            .slash => 40,
            else => -1,
        };
    }

    fn expectExpr(p: *Parser) !Node.Index {
        const tag = p.token_tags[p.index];
        return switch (tag) {
            .k_fn => p.expectFnDecl(),
            else => {
                const left_node = try p.expectPrimaryExpr();
                return p.parseBinRExpr(left_node, 0);
            },
        };
    }

    fn expectPrimaryExpr(p: *Parser) Error!Node.Index {
        const tag = p.token_tags[p.index];
        return switch (tag) {
            .l_paren => p.expectParenExpr(),
            .ident => p.expectIdentExpr(),
            else => p.addNode(.{
                .tag = switch (tag) {
                    .int_lit => .int_lit,
                    .float_lit => .float_lit,
                    else => return Error.ParseError,
                },
                .main_token = p.eatToken(tag) orelse unreachable,
                .data = undefined,
            }),
        };
    }

    fn expectParenExpr(p: *Parser) !Node.Index {
        _ = try p.expectToken(.l_paren);
        const value_node = p.expectExpr();
        _ = try p.expectToken(.r_paren);

        return value_node;
    }

    fn expectIdentExpr(p: *Parser) !Node.Index {
        const ident_token = try p.expectToken(.ident);
        if (p.token_tags[p.index] == .l_paren) {
            std.log.debug("call", .{});
            std.log.debug("reading param list", .{});
            const params = try p.expectParamList();
            std.log.debug("read param list", .{});
            return p.addNode(.{
                .tag = .call_expr,
                .main_token = ident_token,
                .data = .{
                    .l = params.start,
                    .r = params.end,
                },
            });
        } else {
            return p.addNode(.{
                .tag = .ident_expr,
                .main_token = ident_token,
                .data = undefined,
            });
        }
    }

    fn expectParamList(p: *Parser) !Node.Range {
        _ = try p.expectToken(.l_paren);
        const params_start = p.nodes.len;
        while (true) {
            if (p.eatToken(.r_paren)) |_| break;
            // std.log.debug("next token: {any}", .{p.token_tags[p.index - 1..p.index + 2]});
            _ = try p.expectExpr();
            switch (p.token_tags[p.index]) {
                .comma => _ = p.eatToken(.comma),
                .r_paren => {},
                else => return Error.ParseError,
            }
        }
        const params_end = p.nodes.len;

        return Node.Range {
            .start = @intCast(u32, params_start),
            .end = @intCast(u32, params_end),
        };
    }

    fn parseBinRExpr(p: *Parser, l: Node.Index, expr_precedence: i32) !Node.Index {
        var l_node = l;
        while (true) {
            const prec = Parser.precedence(p.token_tags[p.index]);
            if (prec < expr_precedence) {
                return l_node;
            }

            const op_token = p.eatToken(p.token_tags[p.index]) orelse unreachable;
            var r_node = try p.expectPrimaryExpr();

            const next_prec = Parser.precedence(p.token_tags[p.index]);
            if (prec < next_prec) {
                r_node = try p.parseBinRExpr(r_node, prec + 1);
            }

            l_node = try p.addNode(.{
                .tag = .bin_expr,
                .main_token = op_token,
                .data = .{
                    .l = l_node,
                    .r = r_node,
                },
            });
        }
    }

    fn parseTypeDecl(self: *Parser) !Node.Index {
        const type_token = try self.expectToken(.k_type);
        _ = try self.expectToken(.ident);
        _ = try self.expectToken(.equal);
        const type_expr = try self.expectTypeExpr();

        return self.addNode(.{
            .tag = .type_decl,
            .main_token = type_token,
            .data = .{
                .l = type_expr,
                .r = undefined,
            },
        });
    }

    fn expectTypeExpr(self: *Parser) !Node.Index {
        const tag = self.token_tags[self.index];
        return switch (tag) {
            .k_struct => self.parseStructProto(),
            .k_fn => self.expectFnProto(),
            .ident => {
                _ = self.eatToken(.ident);
                return null_node;
            },
            else => Error.ParseError,
        };
    }

    fn parseStructProto(self: *Parser) !Node.Index {
        const struct_token = self.eatToken(.k_struct) orelse return null_node;
        _ = try self.expectToken(.l_brace);
        // const members = try self.parseStructMembers();

        return self.addNode(.{
            .tag = .struct_proto,
            .main_token = struct_token,
            .data = .{
                // .l = members,
                .l = undefined,
                .r = undefined,
            }
        });
    }

    fn expectFnProto(p: *Parser) !Node.Index {
        const fn_token = p.eatToken(.k_fn) orelse return null_node;

        // fn proto node should before children in array
        const fn_proto_index = try p.reserveNode(.fn_proto);
        const params = try p.parseParamDeclList();
        const return_type = try p.expectTypeExpr();

        return p.setNode(fn_proto_index, .{
            .tag = .fn_proto,
            .main_token = fn_token,
            .data = .{
                .l = try p.addExtra(Node.FnProto {
                    .params_start = params.start,
                    .params_end = params.end,
                }),
                .r = return_type,
            },
        });
    }

    fn parseParamDeclList(p: *Parser) !Node.Range {
        _ = try p.expectToken(.l_paren);
        const scratch_top = p.scratch.items.len;
        defer p.scratch.shrinkRetainingCapacity(scratch_top);

        while (true) {
            if (p.eatToken(.r_paren)) |_| break;
            const param = try p.expectParamDecl();
            if (param != null_node) try p.scratch.append(p.gpa, param);
            switch (p.token_tags[p.index]) {
                .comma, => _ = p.eatToken(.comma),
                .r_paren => {},
                else => return Error.ParseError,
            }
        }

        const params = p.scratch.items[scratch_top..];
        try p.extra_data.appendSlice(p.gpa, params);
        return Node.Range {
            .start = @intCast(Node.Index, scratch_top),
            .end = @intCast(Node.Index, p.extra_data.items.len),
        };
    }

    fn expectParamDecl(p: *Parser) !Node.Index {
        _ = try p.expectToken(.ident);
        _ = try p.expectToken(.colon);
        return p.expectTypeExpr();
    }

    fn expectFnDecl(p: *Parser) !Node.Index {
        const proto_node = try p.expectFnProto();
        const block_node = try p.expectBlock();

        return p.addNode(.{
            .tag = .fn_decl,
            .main_token = 0,
            .data = .{
                .l = proto_node,
                .r = block_node,
            },
        });
    }

    fn expectBlock(p: *Parser) !Node.Index {
        const block_node = try p.reserveNode(.block);
        const l_brace_token = try p.expectToken(.l_brace);

        const block_start = p.nodes.len;
        while (true) {
            // std.log.debug("next token: {any}", .{p.token_tags[p.index - 2..p.index + 1]});
            if (p.eatToken(.r_brace)) |_| break;

            switch (p.token_tags[p.index]) {
                // TODO: remove
                .l_brace => _ = try p.expectBlock(),
                else => {
                    if (try p.parseStmt() == null_node) p.index += 1;
                },
            }
            // std.log.debug("next token: {any}", .{p.token_tags[p.index - 1..p.index + 2]});
            // const param = try p.expectParamDecl();
            // if (param != null_node) try p.scratch.append(p.gpa, param);
            // switch (p.token_tags[p.index]) {
            //     .comma, .r_paren => p.index += 1,
            //     else => return Error.ParseError,
            // }
        }
        const block_end = p.nodes.len;

        return p.setNode(block_node, .{
            .tag = .block,
            .main_token = l_brace_token,
            .data = .{
                .l = @intCast(u32, block_start),
                .r = @intCast(u32, block_end),
            }
        });
    }

    fn parseStmt(p: *Parser) Error!Node.Index {
        const tag = p.token_tags[p.index];
        const node = switch (tag) {
            .k_let => p.parseDecl(),
            .k_return => p.parseRetStmt(),
            else => null_node,
        };

        _ = p.eatToken(.semi);
        return node;
    }

    fn parseRetStmt(p: *Parser) !Node.Index {
        const ret_token = try p.expectToken(.k_return);
        const value_node: Node.Index = p.expectExpr() catch null_node;

        return p.addNode(.{
            .tag = .ret_stmt,
            .main_token = ret_token,
            .data = .{
                .l = value_node,
                .r = undefined,
            },
        });
    }
    // fn parseStructMembers(self: *Parser) !Node.Index {
    //     while (true) {
    //         const member_token = try self.expectToken(.ident);
    //         _ = try self.expectToken(.colon);
    //         const member_type = try self.parseTypeExpr();
    //         _ = try self.expect
    //     }
    // }

    fn parseUse(self: *Parser) !Node.Index {
        const use_token = try self.expectToken(.k_use);
        const scope = try self.expectScopeList();

        const as_token = self.eatToken(.k_as) orelse 0;
        if (as_token != 0) _ = try self.expectToken(.ident);

        return self.addNode(.{
            .tag = .use,
            .main_token = use_token,
            .data = .{
                .l = scope,
                .r = as_token,
            }
        });
    }

    fn expectScopeList(p: *Parser) !Node.Range {
        const scratch_top = p.scratch.items.len;
        defer p.scratch.shrinkRetainingCapacity(scratch_top);

        while (true) {
            _ = try p.expectToken(.ident);
            if (p.token_tags[p.index] == .colon_colon) {
                _ = p.eatToken(.colon_colon);
            } else break;
        }

        const params = p.scratch.items[scratch_top..];
        try p.extra_data.appendSlice(p.gpa, params);
        return Node.Range {
            .start = @intCast(Node.Index, scratch_top),
            .end = @intCast(Node.Index, p.extra_data.items.len),
        };
    }
};