7a140c7254828e302efb864ebf188d3d8cfc49bb — Noah Graff a month ago d8bc935 master
added peekName, to peek a shell name
1 files changed, 59 insertions(+), 9 deletions(-)

M src/tokenizer.zig
M src/tokenizer.zig => src/tokenizer.zig +59 -9
@@ 97,33 97,54 @@ pub const Tokenizer = struct {
         }
     }
 
+    pub fn isNextSymbolId(tokenizer: *Tokenizer, symbol_id: Symbol.Id) bool {
+        return tokenizer.nextSymbolId() == symbol_id;
+    }
+
     /// get the text contents of the next word, if the next symbol is a token,
     /// and it's a valid word.
     pub fn peekWord(tokenizer: *Tokenizer) ?[]const u8 {
         if (!tokenizer.isNextSymbolId(.Token)) return null;
 
-        // a symbol of type 'Token' should always have at least one
-        // character left.
-        var word_length: u8 = 1;
+        var word_length: usize = 0;
         while (tokenizer.text.hasRemaining(word_length + 1)) {
             const char = tokenizer.text.at(word_length);
             switch (char) {
-                '\n', ')' => return tokenizer.text.peek(word_length),
+                '\n', ')' => break,
                 '$', '`', '\'', '"', '\\' => return null,
                 else => {},
             }
 
             if (isOperatorStart(char) or std.ascii.isBlank(char)) {
-                return tokenizer.text.peek(word_length);
+                break;
             }
 
             word_length += 1;
         }
-        return tokenizer.text.peek(word_length);
+        return if (word_length > 1) tokenizer.text.peek(word_length) else null;
     }
 
-    pub fn isNextSymbolId(tokenizer: *Tokenizer, symbol_id: Symbol.Id) bool {
-        return tokenizer.nextSymbolId() == symbol_id;
+    // see: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_235
+
+    /// Peek at a single shell name. A name is a word consisting solely of
+    /// underscores, digits, and alphabetics from the portable character set,
+    /// and the first character is not a digit.
+    pub fn peekName(tokenizer: *Tokenizer) ?[]const u8 {
+        if (!tokenizer.isNextSymbolId(.Token)) return null;
+        // text must have a character left if the next symbol is a token
+        const first_char = tokenizer.text.peekChar().?;
+        if (std.ascii.isDigit(first_char)) return null;
+        if (first_char != '_' and !std.ascii.isAlNum(first_char)) return null;
+
+        var word_length: usize = 1;
+        while (tokenizer.text.hasRemaining(word_length + 1)) {
+            const char = tokenizer.text.at(word_length);
+            if (char != '_' and !std.ascii.isAlNum(char)) {
+                break;
+            }
+            word_length += 1;
+        }
+        return tokenizer.text.peek(word_length);
     }
 
     /// Consume the next token, if it exists, and matches token_text. If it


@@ 280,6 301,35 @@ test "Tokenizer.peekWord" {
     _ = text.read(3);
     t.expectEqualSlices(u8, "Lines", tokenizer.peekWord().?);
     _ = text.read(6);
+    try text.append("inval$id");
+    t.expect(null == tokenizer.peekWord());
+}
+
+test "Tokenizer.peekName" {
+    const t = std.testing;
+
+    var text = try TextBuffer.init(std.heap.direct_allocator, "Some names");
+    defer text.deinit();
+
+    var tokenizer = Tokenizer.init(&text);
+    defer tokenizer.deinit();
+    t.expectEqualSlices(u8, "Some", tokenizer.peekName().?);
+    t.expectEqualSlices(u8, "Some", tokenizer.peekName().?);
+    _ = text.read(4);
+    t.expectEqualSlices(u8, "names", tokenizer.peekName().?);
+    t.expectEqualSlices(u8, "names", tokenizer.peekName().?);
+    _ = text.read(5);
+    t.expect(null == tokenizer.peekName());
+    try text.append("delimit%with@non-alpha_numerics");
+    t.expectEqualSlices(u8, "delimit", tokenizer.peekName().?);
+    _ = text.read(8);
+    t.expectEqualSlices(u8, "with", tokenizer.peekName().?);
+    _ = text.read(4);
+    t.expect(null == tokenizer.peekName());
+    _ = text.readChar();
+    t.expectEqualSlices(u8, "non", tokenizer.peekName().?);
+    _ = text.read(4);
+    t.expectEqualSlices(u8, "alpha_numerics", tokenizer.peekName().?);
 }
 
 test "Tokenizer.readNextToken" {


@@ 321,7 371,7 @@ test "Tokenizer.readNextToken" {
     t.expectEqual(
         TextBuffer.Range{
             .start = TextBuffer.Pos{ .offset = 23, .line = 2, .column = 12 },
-            .end = TextBuffer.Pos{ .offset =  27, .line = 2, .column = 16},
+            .end = TextBuffer.Pos{ .offset = 27, .line = 2, .column = 16 },
         },
         tokenizer.readNextToken("more").?,
     );