~mna/snow

0ce020e55c6a4575f2f83ea8f2517d2232067930 — Martin Angers 1 year, 10 months ago 2d36cd3
pkg/compiler: auto-insert semicolons, adjust tests
10 files changed, 194 insertions(+), 46 deletions(-)

M pkg/compiler/parser.go
M pkg/compiler/scanner.go
M pkg/compiler/scanner_test.go
R pkg/compiler/testdata/{fn_add.snow.notyet => fn_add.snow}
A pkg/compiler/testdata/fn_add.snow.err
A pkg/compiler/testdata/fn_add.snow.want
R pkg/compiler/testdata/{fn_naked_return.snow.notyet => fn_naked_return.snow}
A pkg/compiler/testdata/fn_naked_return.snow.err
A pkg/compiler/testdata/fn_naked_return.snow.want
M pkg/compiler/token.go
M pkg/compiler/parser.go => pkg/compiler/parser.go +29 -13
@@ 73,10 73,28 @@ func (p *parser) expect(tok token) gotoken.Pos {
	return pos
}

func (p *parser) expectSemi() {
	// semicolon is optional before a closing ')' or '}'
	if p.tok != Rparen && p.tok != Rbrace {
		switch p.tok {
		case Comma:
			// properly parse a ',' instead of a ';' but add error
			p.errorExpected(p.pos, "';'")
			fallthrough
		case Semicolon:
			p.next()
		default:
			p.errorExpected(p.pos, "';'")
			p.advance(stmtStart)
		}
	}
}

var stmtStart = map[token]bool{
	Var: true,
	Let: true,
	Fn:  true,
	Var:    true,
	Let:    true,
	Fn:     true,
	Return: true,
}

func (p *parser) advance(to map[token]bool) {


@@ 89,15 107,6 @@ func (p *parser) advance(to map[token]bool) {
}

func (p *parser) parseFile() *file {
	// TODO: scopes

	/*
		// stop if we had errors scanning the first token, likely not a valid source file.
		if p.errors.Len() != 0 {
			return nil
		}
	*/

	var stmts []stmt
	for p.tok != EOF {
		stmts = append(stmts, p.parseStmt(true))


@@ 134,7 143,11 @@ func (p *parser) parseReturnStmt() *returnStmt {
	rs := &returnStmt{
		ret: p.expect(Return),
	}
	// TODO: need auto-semicolons here, can't know if there's an expr after
	// != Rbrace so that `{ return }` is allowed on the same line
	if p.tok != Semicolon && p.tok != Rbrace {
		rs.value = p.parseExpr()
	}
	p.expectSemi()
	return rs
}



@@ 158,6 171,8 @@ func (p *parser) parseVarDef() *varDef {
		p.next()
		vd.value = p.parseExpr()
	}
	p.expectSemi()

	if vd.typ == nil && vd.value == nil {
		p.error(pos, "missing variable type or initialization")
	}


@@ 265,6 280,7 @@ func (p *parser) parseFuncDef() *fnDef {
	fd.name = p.parseIdent()
	fd.signature = p.parseFuncSig()
	fd.body = p.parseBlock()
	p.expectSemi()
	return fd
}


M pkg/compiler/scanner.go => pkg/compiler/scanner.go +58 -3
@@ 26,6 26,8 @@ type scanner struct {
	roff int  // reading offset (position after current character)
	loff int  // current line offset

	insertSemi bool

	errCount int
}



@@ 43,6 45,7 @@ func (s *scanner) init(file *gotoken.File, src []byte, errHandler func(gotoken.P
	s.off = 0
	s.roff = 0
	s.loff = 0
	s.insertSemi = false
	s.errCount = 0

	s.next()


@@ 95,12 98,34 @@ func (s *scanner) peek() byte {
	return 0
}

// scan returns the next token in the source file. It automatically inserts
// semicolons following those rules:
//
// - if the non-comment final token of a line is one of:
//   - an identifier
//   - a basic literal (e.g. Int, String)
//   - one of the potentially standalone keywords (e.g. Return)
//   - one of the closing delimiters ')', '}'
//
// A semicolon token is automatically inserted in place of the newline/EOF
// that follows. Its literal value is empty string for EOF, semicolon for an
// actual semicolon in the source file, '\n' for an automatic semicolon and
// '#' for an automatic semicolon due to a comment (that may or may not be
// followed by a newline, e.g. if at EOF).
//
// The way this is implemented is that when one of those tokens is detected,
// s.insertSemi is set to true. If on the next call to scan a newline or a
// comment is encountered, the semicolon token is returned and s.insertSemi
// is reset to false.
func (s *scanner) scan() (pos gotoken.Pos, tok token, lit string) {
	s.skipWhitespace()

	// current token start
	pos = s.file.Pos(s.off)
	// keep current roff if a semicolon is inserted before a comment
	roff := s.roff

	insertSemi := false
	switch cur := s.cur; {
	case isLetter(cur):
		lit = s.ident()


@@ 109,9 134,11 @@ func (s *scanner) scan() (pos gotoken.Pos, tok token, lit string) {
			// keywords are longer than one letter - avoid lookup otherwise
			tok = lookupKw(lit)
		}
		insertSemi = (tok == Ident || tok == Return)

	case isDecimal(cur) || cur == '.' && isDecimal(rune(s.peek())):
		tok, lit = s.number()
		insertSemi = true

	default:
		// keywords, identifiers and numbers are done, what remains is:


@@ 123,18 150,40 @@ func (s *scanner) scan() (pos gotoken.Pos, tok token, lit string) {
		s.next() // always make progress
		switch cur {
		case -1:
			if s.insertSemi {
				s.insertSemi = false
				return pos, Semicolon, ""
			}
			tok = EOF

		case '\n':
			s.insertSemi = false
			return pos, Semicolon, "\n"

		case '#':
			if s.insertSemi {
				s.insertSemi = false
				s.cur = '#'
				s.off = s.file.Offset(pos)
				s.roff = roff
				return pos, Semicolon, "#"
			}
			tok = Comment
			lit = s.comment()

		case '"':
			tok = String
			lit = s.string()
			insertSemi = true

		case ':', ',', '(', ')', '{', '}', '+', '*', '/', '%', '=':
		case ':', ';', ',', '(', ')', '{', '}', '+', '*', '/', '%', '=':
			tok = lookupOp(string(cur))
			insertSemi = (tok == Rparen || tok == Rbrace)
			// special-case for semicolons, set the literal to be able to tell apart the
			// explicit vs implicit semicolons.
			if tok == Semicolon {
				lit = ";"
			}

		case '-':
			tok = Sub


@@ 150,9 199,12 @@ func (s *scanner) scan() (pos gotoken.Pos, tok token, lit string) {
			}
			tok = Illegal
			lit = string(cur)
			// keep current insertSemi state
			insertSemi = s.insertSemi
		}
	}

	s.insertSemi = insertSemi
	return pos, tok, lit
}



@@ 161,7 213,10 @@ func (s *scanner) ident() string {
	for isLetter(s.cur) || isDigit(s.cur) {
		s.next()
	}
	// TODO: allow single "!" or "?" at the end
	// allow single "!" or "?" at the end
	if s.cur == '!' || s.cur == '?' {
		s.next()
	}
	return string(s.src[off:s.off])
}



@@ 280,7 335,7 @@ func (s *scanner) errorf(off int, format string, args ...interface{}) {
}

func (s *scanner) skipWhitespace() {
	for s.cur == ' ' || s.cur == '\t' || s.cur == '\n' || s.cur == '\r' {
	for s.cur == ' ' || s.cur == '\t' || s.cur == '\n' && !s.insertSemi || s.cur == '\r' {
		s.next()
	}
}

M pkg/compiler/scanner_test.go => pkg/compiler/scanner_test.go +51 -3
@@ 1,6 1,7 @@
package compiler

import (
	"fmt"
	gotoken "go/token"
	"testing"
	"unicode/utf8"


@@ 19,9 20,17 @@ func TestScanner(t *testing.T) {
	}{
		{"", nil, 0},
		{
			"#",
			[]tokLit{
				{Comment, "#"},
			},
			0,
		},
		{
			"a",
			[]tokLit{
				{Ident, "a"},
				{Semicolon, ""},
			},
			0,
		},


@@ 30,6 39,7 @@ func TestScanner(t *testing.T) {
			[]tokLit{
				{Return, "return"},
				{Ident, "a"},
				{Semicolon, ""},
			},
			0,
		},


@@ 42,6 52,7 @@ func TestScanner(t *testing.T) {
				{Ident, "int"},
				{Assign, ""},
				{Int, "123"},
				{Semicolon, ""},
			},
			0,
		},


@@ 52,6 63,7 @@ func TestScanner(t *testing.T) {
				{Ident, "x"},
				{Assign, ""},
				{String, `"a\bc\001\xaF\u1234\U00012233"`},
				{Semicolon, ""},
			},
			0,
		},


@@ 64,6 76,7 @@ func TestScanner(t *testing.T) {
				{Ident, "y"},
				{Sub, ""},
				{Int, "1"},
				{Semicolon, ""},
			},
			0,
		},


@@ 77,6 90,7 @@ func TestScanner(t *testing.T) {
				{Ident, "int"},
				{Lbrace, ""},
				{Rbrace, ""},
				{Semicolon, ""},
			},
			0,
		},


@@ 84,6 98,7 @@ func TestScanner(t *testing.T) {
			string(bom) + "a",
			[]tokLit{
				{Ident, "a"},
				{Semicolon, ""},
			},
			0,
		},


@@ 91,8 106,10 @@ func TestScanner(t *testing.T) {
			string(bom) + "a#b\nc",
			[]tokLit{
				{Ident, "a"},
				{Semicolon, "#"},
				{Comment, "#b"},
				{Ident, "c"},
				{Semicolon, ""},
			},
			0,
		},


@@ 102,6 119,7 @@ func TestScanner(t *testing.T) {
				{Ident, "a"},
				{Illegal, string(bom)},
				{Ident, "b"},
				{Semicolon, ""},
			},
			1,
		},


@@ 111,6 129,7 @@ func TestScanner(t *testing.T) {
				{Ident, "a"},
				{Illegal, "\x00"},
				{Ident, "b"},
				{Semicolon, ""},
			},
			1,
		},


@@ 120,6 139,7 @@ func TestScanner(t *testing.T) {
				{Ident, "a"},
				{Illegal, string(utf8.RuneError)},
				{Ident, "b"},
				{Semicolon, ""},
			},
			2, // illegal encoding + illegal char RuneError
		},


@@ 129,6 149,7 @@ func TestScanner(t *testing.T) {
				{Ident, "a"},
				{Illegal, "$"},
				{Ident, "b"},
				{Semicolon, ""},
			},
			1,
		},


@@ 136,6 157,7 @@ func TestScanner(t *testing.T) {
			"\"ab\n",
			[]tokLit{
				{String, "\"ab"},
				{Semicolon, "\n"},
			},
			1,
		},


@@ 143,6 165,7 @@ func TestScanner(t *testing.T) {
			`"ab\`,
			[]tokLit{
				{String, `"ab\`},
				{Semicolon, ""},
			},
			2, // escape not terminated + string not terminated
		},


@@ 150,6 173,7 @@ func TestScanner(t *testing.T) {
			`"ab\xaG"`,
			[]tokLit{
				{String, `"ab\xaG"`},
				{Semicolon, ""},
			},
			1,
		},


@@ 157,6 181,7 @@ func TestScanner(t *testing.T) {
			`"ab\xa`,
			[]tokLit{
				{String, `"ab\xa`},
				{Semicolon, ""},
			},
			2, // escape + string not terminated
		},


@@ 164,12 189,34 @@ func TestScanner(t *testing.T) {
			`"\U99999999"`,
			[]tokLit{
				{String, `"\U99999999"`},
				{Semicolon, ""},
			},
			1,
		},
		{
			"a;b\nc",
			[]tokLit{
				{Ident, "a"},
				{Semicolon, ";"},
				{Ident, "b"},
				{Semicolon, "\n"},
				{Ident, "c"},
				{Semicolon, ""},
			},
			0,
		},
		{
			"a#",
			[]tokLit{
				{Ident, "a"},
				{Semicolon, "#"},
				{Comment, "#"},
			},
			0,
		},
	}
	for _, c := range cases {
		t.Run(c.in, func(t *testing.T) {
	for i, c := range cases {
		t.Run(fmt.Sprintf("%d: %s", i, c.in), func(t *testing.T) {
			fset := gotoken.NewFileSet()
			file := fset.AddFile("test", -1, len(c.in))



@@ 188,7 235,8 @@ func TestScanner(t *testing.T) {
				}

				if n >= len(c.out) {
					t.Fatalf("want %d tokens, got at least %d", len(c.out), n)
					t.Errorf("%d: extraneous token: %s [%s]", n, tok, lit)
					continue
				}

				if c.out[n].tok != tok {

R pkg/compiler/testdata/fn_add.snow.notyet => pkg/compiler/testdata/fn_add.snow +0 -0
A pkg/compiler/testdata/fn_add.snow.err => pkg/compiler/testdata/fn_add.snow.err +0 -0
A pkg/compiler/testdata/fn_add.snow.want => pkg/compiler/testdata/fn_add.snow.want +16 -0
@@ 0,0 1,16 @@
file [0:48]
  fn [0:48]
    ident [add] [3:6]
    sig [6:29]
      param [7:14]
        ident [x] [7:8]
        ident [int] [10:13]
      param [15:21]
        ident [y] [15:16]
        ident [int] [18:21]
      ident [int] [26:29]
    block [30:48]
      return [34:46]
        binary [+] [41:46]
          ident [x] [41:42]
          ident [y] [45:46]

R pkg/compiler/testdata/fn_naked_return.snow.notyet => pkg/compiler/testdata/fn_naked_return.snow +0 -1
@@ 1,4 1,3 @@
fn print(x: int, y: string) {
  x + y
  return
}

A pkg/compiler/testdata/fn_naked_return.snow.err => pkg/compiler/testdata/fn_naked_return.snow.err +0 -0
A pkg/compiler/testdata/fn_naked_return.snow.want => pkg/compiler/testdata/fn_naked_return.snow.want +12 -0
@@ 0,0 1,12 @@
file [0:40]
  fn [0:40]
    ident [print] [3:8]
    sig [8:27]
      param [9:16]
        ident [x] [9:10]
        ident [int] [12:15]
      param [17:26]
        ident [y] [17:18]
        ident [string] [20:26]
    block [28:40]
      return [32:38]

M pkg/compiler/token.go => pkg/compiler/token.go +28 -26
@@ 21,19 21,20 @@ const (

	opStart
	// Operators and delimiters
	Add    // +
	Sub    // -
	Mul    // *
	Div    // /
	Mod    // %
	Assign // =
	Lparen // (
	Lbrace // {
	Comma  // ,
	Rparen // )
	Rbrace // }
	Colon  // :
	Rarrow // ->
	Add       // +
	Sub       // -
	Mul       // *
	Div       // /
	Mod       // %
	Assign    // =
	Lparen    // (
	Lbrace    // {
	Comma     // ,
	Rparen    // )
	Rbrace    // }
	Colon     // :
	Rarrow    // ->
	Semicolon // ;
	opEnd

	kwStart


@@ 55,19 56,20 @@ var tokens = [...]string{
	Float:  "float",
	String: "string",

	Add:    "+",
	Sub:    "-",
	Mul:    "*",
	Div:    "/",
	Mod:    "%",
	Assign: "=",
	Lparen: "(",
	Lbrace: "{",
	Comma:  ",",
	Rparen: ")",
	Rbrace: "}",
	Colon:  ":",
	Rarrow: "->",
	Add:       "+",
	Sub:       "-",
	Mul:       "*",
	Div:       "/",
	Mod:       "%",
	Assign:    "=",
	Lparen:    "(",
	Lbrace:    "{",
	Comma:     ",",
	Rparen:    ")",
	Rbrace:    "}",
	Colon:     ":",
	Rarrow:    "->",
	Semicolon: ";",

	Let:    "let",
	Var:    "var",