~ntgg/zosh

615ec47cff52c89cc3058106e061921c75e00209 — Noah Graff 4 years ago 00d69d4
added readNextToken
1 files changed, 114 insertions(+), 2 deletions(-)

M src/tokenizer.zig
M src/tokenizer.zig => src/tokenizer.zig +114 -2
@@ 125,9 125,36 @@ pub const Tokenizer = struct {
    pub fn isNextSymbolId(tokenizer: *Tokenizer, symbol_id: Symbol.Id) bool {
        return tokenizer.nextSymbolId() == symbol_id;
    }

    /// Consume the next token, if it exists, and matches token_text. If it
    /// does exist, returns the range that the token covers, or null if it does
    /// not exist. token_text must not be empty.
    pub fn readNextToken(tokenizer: *Tokenizer, token_text: []const u8) ?TextBuffer.Range {
        std.debug.assert(token_text.len > 0);
        if (!tokenizer.isNextSymbolId(.Token)) return null;

        const start = tokenizer.text.pos;

        if (token_text.len == 1 and !std.ascii.isAlpha(token_text[0])) {
            const next_char = tokenizer.text.peekChar() orelse return null;
            if (next_char != token_text[0]) return null;
            _ = tokenizer.text.readChar();
        } else {
            const next_word = tokenizer.peekWord() orelse return null;
            if (!std.mem.eql(u8, next_word, token_text)) {
                return null;
            }
            _ = tokenizer.text.read(token_text.len);
        }

        return TextBuffer.Range{
            .start = start,
            .end = tokenizer.text.pos,
        };
    }
};

test "Tokenizer.nextSymbolId()" {
test "Tokenizer.nextSymbolId" {
    const t = std.testing;

    var text = try TextBuffer.init(std.heap.direct_allocator,


@@ 206,7 233,7 @@ test "Tokenizer.nextSymbolId()" {
    t.expectEqual(Symbol.Id.EndOfFile, tokenizer.nextSymbolId());
}

test "Tokenizer.peekWord()" {
test "Tokenizer.peekWord" {
    const t = std.testing;

    var text = try TextBuffer.init(std.heap.direct_allocator, "Some tokens");


@@ 239,6 266,91 @@ test "Tokenizer.peekWord()" {
    t.expectEqualSlices(u8, "operators", tokenizer.peekWord().?);
    _ = text.read(9);
    t.expect(null == tokenizer.peekWord());
    try text.append(
        \\Tokens
        \\Split
        \\On
        \\Lines
    );
    t.expectEqualSlices(u8, "Tokens", tokenizer.peekWord().?);
    _ = text.read(7);
    t.expectEqualSlices(u8, "Split", tokenizer.peekWord().?);
    _ = text.read(6);
    t.expectEqualSlices(u8, "On", tokenizer.peekWord().?);
    _ = text.read(3);
    t.expectEqualSlices(u8, "Lines", tokenizer.peekWord().?);
    _ = text.read(6);
}

test "Tokenizer.readNextToken" {
    const t = std.testing;

    var text = try TextBuffer.init(std.heap.direct_allocator,
        \\# a comment
        \\token!
    );
    defer text.deinit();

    var tokenizer = Tokenizer.init(&text);
    defer tokenizer.deinit();

    _ = tokenizer.nextSymbolId(); // skip comment
    _ = text.read(1); // skip new line
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 12, .line = 2, .column = 1 },
            .end = TextBuffer.Pos{ .offset = 18, .line = 2, .column = 7 },
        },
        tokenizer.readNextToken("token!").?,
    );
    t.expectEqual(Symbol.Id.EndOfFile, tokenizer.nextSymbolId());
    try text.append(
        \\Some more tokens
        \\split>|operators;;
    );
    t.expect(null == tokenizer.readNextToken("Somee"));
    t.expect(null == tokenizer.readNextToken("Som"));
    t.expect(null == tokenizer.readNextToken("some"));
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 18, .line = 2, .column = 7 },
            .end = TextBuffer.Pos{ .offset = 22, .line = 2, .column = 11 },
        },
        tokenizer.readNextToken("Some").?,
    );
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 23, .line = 2, .column = 12 },
            .end = TextBuffer.Pos{ .offset =  27, .line = 2, .column = 16},
        },
        tokenizer.readNextToken("more").?,
    );
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 28, .line = 2, .column = 17 },
            .end = TextBuffer.Pos{ .offset = 34, .line = 2, .column = 23 },
        },
        tokenizer.readNextToken("tokens").?,
    );
    t.expect(null == tokenizer.readNextToken("split"));
    _ = text.readChar(); // skip new line
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 35, .line = 3, .column = 1 },
            .end = TextBuffer.Pos{ .offset = 40, .line = 3, .column = 6 },
        },
        tokenizer.readNextToken("split").?,
    );
    t.expect(null == tokenizer.readNextToken("operators"));
    _ = text.read(2); // skip clobber
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 42, .line = 3, .column = 8 },
            .end = TextBuffer.Pos{ .offset = 51, .line = 3, .column = 17 },
        },
        tokenizer.readNextToken("operators").?,
    );
    t.expectEqual(Symbol.Id.DSemi, tokenizer.nextSymbolId());
}

const Operator = struct {