~ntgg/zosh

7a140c7254828e302efb864ebf188d3d8cfc49bb — Noah Graff 8 months ago d8bc935 master
added peekName, to peek a shell name
1 files changed, 59 insertions(+), 9 deletions(-)

M src/tokenizer.zig
M src/tokenizer.zig => src/tokenizer.zig +59 -9
@@ 97,33 97,54 @@ pub const Tokenizer = struct {
        }
    }

    pub fn isNextSymbolId(tokenizer: *Tokenizer, symbol_id: Symbol.Id) bool {
        return tokenizer.nextSymbolId() == symbol_id;
    }

    /// get the text contents of the next word, if the next symbol is a token,
    /// and it's a valid word.
    pub fn peekWord(tokenizer: *Tokenizer) ?[]const u8 {
        if (!tokenizer.isNextSymbolId(.Token)) return null;

        // a symbol of type 'Token' should always have at least one
        // character left.
        var word_length: u8 = 1;
        var word_length: usize = 0;
        while (tokenizer.text.hasRemaining(word_length + 1)) {
            const char = tokenizer.text.at(word_length);
            switch (char) {
                '\n', ')' => return tokenizer.text.peek(word_length),
                '\n', ')' => break,
                '$', '`', '\'', '"', '\\' => return null,
                else => {},
            }

            if (isOperatorStart(char) or std.ascii.isBlank(char)) {
                return tokenizer.text.peek(word_length);
                break;
            }

            word_length += 1;
        }
        return tokenizer.text.peek(word_length);
        return if (word_length > 1) tokenizer.text.peek(word_length) else null;
    }

    pub fn isNextSymbolId(tokenizer: *Tokenizer, symbol_id: Symbol.Id) bool {
        return tokenizer.nextSymbolId() == symbol_id;
    // see: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_235

    /// Peek at a single shell name. A name is a word consisting solely of
    /// underscores, digits, and alphabetics from the portable character set,
    /// and the first character is not a digit.
    pub fn peekName(tokenizer: *Tokenizer) ?[]const u8 {
        if (!tokenizer.isNextSymbolId(.Token)) return null;
        // text must have a character left if the next symbol is a token
        const first_char = tokenizer.text.peekChar().?;
        if (std.ascii.isDigit(first_char)) return null;
        if (first_char != '_' and !std.ascii.isAlNum(first_char)) return null;

        var word_length: usize = 1;
        while (tokenizer.text.hasRemaining(word_length + 1)) {
            const char = tokenizer.text.at(word_length);
            if (char != '_' and !std.ascii.isAlNum(char)) {
                break;
            }
            word_length += 1;
        }
        return tokenizer.text.peek(word_length);
    }

    /// Consume the next token, if it exists, and matches token_text. If it


@@ 280,6 301,35 @@ test "Tokenizer.peekWord" {
    _ = text.read(3);
    t.expectEqualSlices(u8, "Lines", tokenizer.peekWord().?);
    _ = text.read(6);
    try text.append("inval$id");
    t.expect(null == tokenizer.peekWord());
}

test "Tokenizer.peekName" {
    const t = std.testing;

    var text = try TextBuffer.init(std.heap.direct_allocator, "Some names");
    defer text.deinit();

    var tokenizer = Tokenizer.init(&text);
    defer tokenizer.deinit();
    t.expectEqualSlices(u8, "Some", tokenizer.peekName().?);
    t.expectEqualSlices(u8, "Some", tokenizer.peekName().?);
    _ = text.read(4);
    t.expectEqualSlices(u8, "names", tokenizer.peekName().?);
    t.expectEqualSlices(u8, "names", tokenizer.peekName().?);
    _ = text.read(5);
    t.expect(null == tokenizer.peekName());
    try text.append("delimit%with@non-alpha_numerics");
    t.expectEqualSlices(u8, "delimit", tokenizer.peekName().?);
    _ = text.read(8);
    t.expectEqualSlices(u8, "with", tokenizer.peekName().?);
    _ = text.read(4);
    t.expect(null == tokenizer.peekName());
    _ = text.readChar();
    t.expectEqualSlices(u8, "non", tokenizer.peekName().?);
    _ = text.read(4);
    t.expectEqualSlices(u8, "alpha_numerics", tokenizer.peekName().?);
}

test "Tokenizer.readNextToken" {


@@ 321,7 371,7 @@ test "Tokenizer.readNextToken" {
    t.expectEqual(
        TextBuffer.Range{
            .start = TextBuffer.Pos{ .offset = 23, .line = 2, .column = 12 },
            .end = TextBuffer.Pos{ .offset =  27, .line = 2, .column = 16},
            .end = TextBuffer.Pos{ .offset = 27, .line = 2, .column = 16 },
        },
        tokenizer.readNextToken("more").?,
    );