~deciduously/nlox

nlox/src/nloxpkg/scanner.nim -rw-r--r-- 5.3 KiB
0f2f20d1Ben Lovy Scanner 5 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# Tokenizer

import strutils, tables
import loxerror, token

type
    Scanner* = ref object of RootObj
        error: LoxError
        source: string
        tokens: seq[Token]
        start: int
        current: int
        line: int

proc new_scanner*(source: string, error: LoxError): Scanner =
    Scanner(
        error: error,
        source: source,
        start: 0,
        current: 0,
        line: 1
    )

let KEYWORDS =
    {
        "and": TokenType.AND,
        "class": TokenType.CLASS,
        "else": TokenType.ELSE,
        "false": TokenType.FALSE,
        "for": TokenType.FOR,
        "if": TokenType.IF,
        "nil": TokenType.NIL,
        "or": TokenType.OR,
        "print": TokenType.PRINT,
        "return": TokenType.RETURN,
        "super": TokenType.SUPER,
        "this": TokenType.THIS,
        "true": TokenType.TRUE,
        "var": TokenType.VAR,
        "while": TokenType.WHILE
    }.toTable

proc is_digit(ch: char): bool =
    ch >= '0' and ch <= '9'

proc is_alpha(ch: char): bool =
    ch in { 'A' .. 'Z'} + { 'a' .. 'z' } or ch == '_'

proc is_alphanumeric(ch: char): bool =
    ch.is_alpha() or ch.is_digit()

method current_char(self: Scanner): char {.base.} =
    self.source[self.current]

method current_text(self: Scanner): string {.base.} =
    self.source[self.start .. self.current - 1]

method is_at_end(self: Scanner): bool {.base.} =
    self.current >= self.source.len()

method advance(self: Scanner): char {.base.} =
    self.current += 1
    self.source[self.current - 1]

method add_none_token(self: Scanner, token_type: TokenType) {.base.} =
    let text = self.current_text()
    self.tokens.add(new_none_token(token_type, text, self.line))

method add_literal_token(self: Scanner, token_type: TokenType, literal: Literal) {.base.} =
    let text = self.current_text()
    self.tokens.add(new_literal_token(token_type, text, literal, self.line))

# Only advance if we find what we're looking for
method match(self: Scanner, expected: char): bool {.base.} =
    if self.is_at_end() or self.current_char() != expected: return false
    self.current += 1
    true

method peek(self: Scanner): char {.base.} =
    if self.is_at_end(): '\0' else: self.current_char()

method read_string(self: Scanner) {.base.} =
    while self.peek() != '"' and not self.is_at_end():
        if self.peek() == '\n': self.line += 1
        let _ = self.advance() # Scroll through string
    if self.is_at_end():
        self.error.error(self.line, "Unterminated string.")
        return
    let _ = self.advance() # handle the closing quote
    # trim quotes
    let val = self.source[self.start + 1 .. self.current - 2]
    self.add_literal_token(STRING, new_string_literal(val))

method peek_next(self: Scanner): char {.base.} =
    if self.current + 1 >= self.source.len(): '\0' else: self.source[self.current + 1]

method read_number(self: Scanner) {.base.} =
    while is_digit(self.peek()):
        let _ = self.advance()

    if self.peek() == '.' and is_digit(self.peek_next()):
        let _ = self.advance() # consume the '.'
        while is_digit(self.peek()):
            let _ = self.advance()
    self.add_literal_token(TokenType.NUMBER, new_float_literal(parseFloat(self.source[self.start .. self.current - 1])))

method read_identifier(self: Scanner) {.base.} =
    while (self.peek().isAlphaNumeric()):
        let _ = self.advance()
    let text = self.current_text()
    if KEYWORDS.hasKey(text): self.add_none_token(KEYWORDS[text])
    else: self.add_none_token(TokenType.IDENTIFIER)

method scan_token(self: Scanner) {.base.} =
    let ch = self.advance()
    case ch:
        of '(': self.add_none_token(TokenType.LEFT_PAREN)
        of ')': self.add_none_token(TokenType.RIGHT_PAREN)
        of '{': self.add_none_token(TokenType.LEFT_BRACE)
        of '}': self.add_none_token(TokenType.RIGHT_BRACE)
        of ',': self.add_none_token(TokenType.COMMA)
        of '.': self.add_none_token(TokenType.DOT)
        of '-': self.add_none_token(TokenType.MINUS)
        of '+': self.add_none_token(TokenType.PLUS)
        of ';': self.add_none_token(TokenType.SEMICOLON)
        of '*': self.add_none_token(TokenType.STAR)
        of '!': self.add_none_token(if self.match('='): BANG_EQUAL else: BANG)
        of '=': self.add_none_token(if self.match('='): EQUAL_EQUAL else: EQUAL)
        of '<': self.add_none_token(if self.match('='): LESS_EQUAL else: LESS)
        of '>': self.add_none_token(if self.match('='): GREATER_EQUAL else: GREATER)
        of '/':
            # It's a comment
            if self.match('/'):
              # A comment goes until the end of the line.
              while self.peek() != '\n' and not self.is_at_end():
                  let _ = self.advance()
            else:
                # It's division
              self.add_none_token(SLASH)
        of ' ', '\r', '\t': return # ignore whitespace, do nothing else
        of '\n': self.line += 1
        of '"': self.read_string()
        else:
            if ch.is_digit(): self.read_number()
            elif ch.is_alpha(): self.read_identifier()
            else: self.error.error(self.line, "Unexpected character")

method scan_tokens*(self: Scanner): seq[Token] {.base.} =
    while not(self.is_at_end()):
        self.start = self.current
        self.scan_token()
    
    self.tokens.add(new_none_token(TokenType.EOF, "", self.line))
    self.tokens