From 0f2f20d1105e10e8ffd66bb0d8c2efedd34198dc Mon Sep 17 00:00:00 2001 From: Ben Lovy Date: Sat, 26 Dec 2020 17:45:10 -0500 Subject: [PATCH] Scanner --- src/nlox.nim | 5 +- src/nloxpkg/lox.nim | 29 ++++++-- src/nloxpkg/loxerror.nim | 17 +++++ src/nloxpkg/scanner.nim | 154 +++++++++++++++++++++++++++++++++++++++ src/nloxpkg/token.nim | 69 ++++++++++++++++++ test.lox | 5 +- 6 files changed, 270 insertions(+), 9 deletions(-) create mode 100644 src/nloxpkg/loxerror.nim create mode 100644 src/nloxpkg/scanner.nim create mode 100644 src/nloxpkg/token.nim diff --git a/src/nlox.nim b/src/nlox.nim index bc0bf1b..a9210ff 100644 --- a/src/nlox.nim +++ b/src/nlox.nim @@ -9,11 +9,12 @@ let usage*: string = "Usage: nlox [script]" when isMainModule: let params = commandLineParams() let num_params = params.len() + let interpreter = new_lox() if num_params > 1: echo usage quit(64) elif num_params == 1: - run_file(params[0]) + interpreter.run_file(params[0]) else: - run_prompt() \ No newline at end of file + interpreter.run_prompt() \ No newline at end of file diff --git a/src/nloxpkg/lox.nim b/src/nloxpkg/lox.nim index 3edf77b..81335bd 100644 --- a/src/nloxpkg/lox.nim +++ b/src/nloxpkg/lox.nim @@ -1,17 +1,34 @@ #! The top-level of the Lox interpreter. +import loxerror, scanner, token + +type + Lox* = ref object of RootObj + error_logger: LoxError + +# Build a new interpreter +proc new_lox*(): Lox = + var ret = new Lox + ret.error_logger = new LoxError + ret + # Interpret Lox input, results to stdout -proc run(source: string) = - echo source +method run(self: Lox, source: string) {.base.} = + let scanner = new_scanner(source, self.error_logger) + let tokens = scanner.scan_tokens() + for token in tokens: + echo $token # Interpret a source file -proc run_file*(path: string) = - run(readFile(path)) +method run_file*(self: Lox, path: string) {.base.} = + self.run(readFile(path)) + if self.error_logger.had_error: quit(65) # Open a REPL -proc run_prompt*() = +method run_prompt*(self: Lox) {.base.} = while true: stdout.write("> ") let line = stdin.readLine() if line.len() == 0: break - run(line) \ No newline at end of file + self.run(line) + self.error_logger.had_error = false \ No newline at end of file diff --git a/src/nloxpkg/loxerror.nim b/src/nloxpkg/loxerror.nim new file mode 100644 index 0000000..a94f4b8 --- /dev/null +++ b/src/nloxpkg/loxerror.nim @@ -0,0 +1,17 @@ +#! Error logging + +type + LoxError* = ref object of RootObj + had_error*: bool + +proc new_lox_error*(): LoxError = + LoxError(had_error: false) + +# Error reporting helper +method report(self: LoxError, line: int, where: string, message: string) {.base.} = + stderr.writeLine("[line " & $line & "] Error" & where & ": " & message) + self.had_error = true + +# Error reporting with line number +method error*(self: LoxError, line: int, message: string) {.base.} = + self.report(line, "", message) diff --git a/src/nloxpkg/scanner.nim b/src/nloxpkg/scanner.nim new file mode 100644 index 0000000..126e465 --- /dev/null +++ b/src/nloxpkg/scanner.nim @@ -0,0 +1,154 @@ +# Tokenizer + +import strutils, tables +import loxerror, token + +type + Scanner* = ref object of RootObj + error: LoxError + source: string + tokens: seq[Token] + start: int + current: int + line: int + +proc new_scanner*(source: string, error: LoxError): Scanner = + Scanner( + error: error, + source: source, + start: 0, + current: 0, + line: 1 + ) + +let KEYWORDS = + { + "and": TokenType.AND, + "class": TokenType.CLASS, + "else": TokenType.ELSE, + "false": TokenType.FALSE, + "for": TokenType.FOR, + "if": TokenType.IF, + "nil": TokenType.NIL, + "or": TokenType.OR, + "print": TokenType.PRINT, + "return": TokenType.RETURN, + "super": TokenType.SUPER, + "this": TokenType.THIS, + "true": TokenType.TRUE, + "var": TokenType.VAR, + "while": TokenType.WHILE + }.toTable + +proc is_digit(ch: char): bool = + ch >= '0' and ch <= '9' + +proc is_alpha(ch: char): bool = + ch in { 'A' .. 'Z'} + { 'a' .. 'z' } or ch == '_' + +proc is_alphanumeric(ch: char): bool = + ch.is_alpha() or ch.is_digit() + +method current_char(self: Scanner): char {.base.} = + self.source[self.current] + +method current_text(self: Scanner): string {.base.} = + self.source[self.start .. self.current - 1] + +method is_at_end(self: Scanner): bool {.base.} = + self.current >= self.source.len() + +method advance(self: Scanner): char {.base.} = + self.current += 1 + self.source[self.current - 1] + +method add_none_token(self: Scanner, token_type: TokenType) {.base.} = + let text = self.current_text() + self.tokens.add(new_none_token(token_type, text, self.line)) + +method add_literal_token(self: Scanner, token_type: TokenType, literal: Literal) {.base.} = + let text = self.current_text() + self.tokens.add(new_literal_token(token_type, text, literal, self.line)) + +# Only advance if we find what we're looking for +method match(self: Scanner, expected: char): bool {.base.} = + if self.is_at_end() or self.current_char() != expected: return false + self.current += 1 + true + +method peek(self: Scanner): char {.base.} = + if self.is_at_end(): '\0' else: self.current_char() + +method read_string(self: Scanner) {.base.} = + while self.peek() != '"' and not self.is_at_end(): + if self.peek() == '\n': self.line += 1 + let _ = self.advance() # Scroll through string + if self.is_at_end(): + self.error.error(self.line, "Unterminated string.") + return + let _ = self.advance() # handle the closing quote + # trim quotes + let val = self.source[self.start + 1 .. self.current - 2] + self.add_literal_token(STRING, new_string_literal(val)) + +method peek_next(self: Scanner): char {.base.} = + if self.current + 1 >= self.source.len(): '\0' else: self.source[self.current + 1] + +method read_number(self: Scanner) {.base.} = + while is_digit(self.peek()): + let _ = self.advance() + + if self.peek() == '.' and is_digit(self.peek_next()): + let _ = self.advance() # consume the '.' + while is_digit(self.peek()): + let _ = self.advance() + self.add_literal_token(TokenType.NUMBER, new_float_literal(parseFloat(self.source[self.start .. self.current - 1]))) + +method read_identifier(self: Scanner) {.base.} = + while (self.peek().isAlphaNumeric()): + let _ = self.advance() + let text = self.current_text() + if KEYWORDS.hasKey(text): self.add_none_token(KEYWORDS[text]) + else: self.add_none_token(TokenType.IDENTIFIER) + +method scan_token(self: Scanner) {.base.} = + let ch = self.advance() + case ch: + of '(': self.add_none_token(TokenType.LEFT_PAREN) + of ')': self.add_none_token(TokenType.RIGHT_PAREN) + of '{': self.add_none_token(TokenType.LEFT_BRACE) + of '}': self.add_none_token(TokenType.RIGHT_BRACE) + of ',': self.add_none_token(TokenType.COMMA) + of '.': self.add_none_token(TokenType.DOT) + of '-': self.add_none_token(TokenType.MINUS) + of '+': self.add_none_token(TokenType.PLUS) + of ';': self.add_none_token(TokenType.SEMICOLON) + of '*': self.add_none_token(TokenType.STAR) + of '!': self.add_none_token(if self.match('='): BANG_EQUAL else: BANG) + of '=': self.add_none_token(if self.match('='): EQUAL_EQUAL else: EQUAL) + of '<': self.add_none_token(if self.match('='): LESS_EQUAL else: LESS) + of '>': self.add_none_token(if self.match('='): GREATER_EQUAL else: GREATER) + of '/': + # It's a comment + if self.match('/'): + # A comment goes until the end of the line. + while self.peek() != '\n' and not self.is_at_end(): + let _ = self.advance() + else: + # It's division + self.add_none_token(SLASH) + of ' ', '\r', '\t': return # ignore whitespace, do nothing else + of '\n': self.line += 1 + of '"': self.read_string() + else: + if ch.is_digit(): self.read_number() + elif ch.is_alpha(): self.read_identifier() + else: self.error.error(self.line, "Unexpected character") + +method scan_tokens*(self: Scanner): seq[Token] {.base.} = + while not(self.is_at_end()): + self.start = self.current + self.scan_token() + + self.tokens.add(new_none_token(TokenType.EOF, "", self.line)) + self.tokens diff --git a/src/nloxpkg/token.nim b/src/nloxpkg/token.nim new file mode 100644 index 0000000..6911e7b --- /dev/null +++ b/src/nloxpkg/token.nim @@ -0,0 +1,69 @@ +# Pairs tokens with location data. + +import options + +type + TokenType* {.pure.} = enum + # Base + LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, + COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR, + + # One or two character tokens. + BANG, BANG_EQUAL, + EQUAL, EQUAL_EQUAL, + GREATER, GREATER_EQUAL, + LESS, LESS_EQUAL, + + # Literals. + IDENTIFIER, STRING, NUMBER, + + # Keywords. + AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, + PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE, + + EOF + + Literal* = ref object of RootObj + s: Option[string] + f: Option[float] + + Token* = ref object of RootObj + token_type: TokenType + lexeme: string + literal: Option[Literal] + line: int + +proc new_string_literal*(s: string): Literal = + Literal(s: some(s), f: none(float)) + +proc new_float_literal*(f: float): Literal = + Literal(s: none(string), f: some(f)) + +proc `$`*(self: Literal): string = + if self.s.isSome(): + self.s.get() + else: + $self.f.get() + +proc new_literal_token*( + token_type: TokenType, + lexeme: string, + literal: Literal, + line: int + ): Token = + Token(token_type: token_type, lexeme: lexeme, literal: some(literal), line: line) + +proc new_none_token*( + token_type: TokenType, + lexeme: string, + line: int + ): Token = + Token(token_type: token_type, lexeme: lexeme, literal: none(Literal), line: line) + +proc `$`*(token: Token): string = + let lit = + if token.literal.isNone(): + "" + else: + $token.literal.get() + $token.token_type & " " & token.lexeme & " " & lit \ No newline at end of file diff --git a/test.lox b/test.lox index 91e60fa..90eedcb 100644 --- a/test.lox +++ b/test.lox @@ -1 +1,4 @@ -print "hello, world"; \ No newline at end of file +print "hello, world"; +var language = "lox"; +8.3 * (3 + 4) / (7 - 2) +"this" or "that" \ No newline at end of file -- 2.30.1