~ntgg/zosh

df2e1df20bf4cc779d08fa56e60effca1d2a8c4b — Noah Graff 4 years ago 44e6796
added peekWord method for Tokenizer
1 files changed, 64 insertions(+), 0 deletions(-)

M src/tokenizer.zig
M src/tokenizer.zig => src/tokenizer.zig +64 -0
@@ 96,6 96,35 @@ pub const Tokenizer = struct {
            return .EndOfFile;
        }
    }

    /// get the text contents of the next word, if the next symbol is a token,
    /// and it's a valid word.
    pub fn peekWord(tokenizer: *Tokenizer) ?[]const u8 {
        if (!tokenizer.isNextSymbolId(.Token)) return null;

        // a symbol of type 'Token' should always have at least one
        // character left.
        var word_length: u8 = 1;
        while (tokenizer.text.hasRemaining(word_length + 1)) {
            const char = tokenizer.text.at(word_length);
            switch (char) {
                '\n', ')' => return tokenizer.text.peek(word_length),
                '$', '`', '\'', '"', '\\' => return null,
                else => {},
            }

            if (isOperatorStart(char) or std.ascii.isBlank(char)) {
                return tokenizer.text.peek(word_length);
            }

            word_length += 1;
        }
        return tokenizer.text.peek(word_length);
    }

    pub fn isNextSymbolId(tokenizer: *Tokenizer, symbol_id: Symbol.Id) bool {
        return tokenizer.nextSymbolId() == symbol_id;
    }
};

test "Tokenizer.nextSymbolId()" {


@@ 177,6 206,41 @@ test "Tokenizer.nextSymbolId()" {
    t.expectEqual(Symbol.Id.EndOfFile, tokenizer.nextSymbolId());
}

test "Tokenizer.peekWord()" {
    const t = std.testing;

    var text = try TextBuffer.init(std.heap.direct_allocator, "Some tokens");
    defer text.deinit();

    var tokenizer = Tokenizer.init(&text);
    defer tokenizer.deinit();
    t.expectEqualSlices(u8, "Some", tokenizer.peekWord().?);
    t.expectEqualSlices(u8, "Some", tokenizer.peekWord().?);
    t.expectEqualSlices(u8, "Some", tokenizer.peekWord().?);
    _ = text.read(4);
    t.expectEqualSlices(u8, "tokens", tokenizer.peekWord().?);
    t.expectEqualSlices(u8, "tokens", tokenizer.peekWord().?);
    t.expectEqualSlices(u8, "tokens", tokenizer.peekWord().?);
    _ = text.read(6);
    t.expect(null == tokenizer.peekWord());
    t.expect(null == tokenizer.peekWord());
    t.expect(null == tokenizer.peekWord());
    try text.append("more||tokens&&with<operators");
    t.expectEqualSlices(u8, "more", tokenizer.peekWord().?);
    _ = text.read(4);
    t.expect(null == tokenizer.peekWord());
    _ = text.read(2);
    t.expectEqualSlices(u8, "tokens", tokenizer.peekWord().?);
    _ = text.read(6);
    t.expect(null == tokenizer.peekWord());
    _ = text.read(2);
    t.expectEqualSlices(u8, "with", tokenizer.peekWord().?);
    _ = text.read(5);
    t.expectEqualSlices(u8, "operators", tokenizer.peekWord().?);
    _ = text.read(9);
    t.expect(null == tokenizer.peekWord());
}

const Operator = struct {
    symbol_id: Symbol.Id,
    text: []const u8,