// Package parser parses input source into an abstract syntax tree (AST).
package parser
import (
"fmt"
"io/ioutil"
"strconv"
"git.sr.ht/~mna/snow/pkg/ast"
"git.sr.ht/~mna/snow/pkg/scanner"
"git.sr.ht/~mna/snow/pkg/token"
)
// ParseFiles is a helper function that parses the source files and returns the
// fileset along with the ASTs and any error encountered. The error, if non-nil,
// is guaranteed to be a scanner.ErrorList.
func ParseFiles(files ...string) (*token.FileSet, []*ast.File, error) {
if len(files) == 0 {
return nil, nil, nil
}
var p parser
res := make([]*ast.File, 0, len(files))
fs := token.NewFileSet()
for _, file := range files {
b, err := ioutil.ReadFile(file)
if err != nil {
p.errors.Add(token.Position{Filename: file}, err.Error())
continue
}
p.init(fs, file, b)
f := p.parseFile()
res = append(res, f)
}
return fs, res, p.errors.Err()
}
// parser parses source files and generates an AST.
type parser struct {
scanner scanner.Scanner
errors scanner.ErrorList
file *token.File
// current token
tok token.Token
pos token.Pos
lit string
// collected comments
cgs []*ast.CommentGroup
}
func (p *parser) init(fset *token.FileSet, filename string, src []byte) {
p.file = fset.AddFile(filename, -1, len(src))
p.scanner.Init(p.file, src, p.errors.Add)
p.cgs = nil
// advance to first token
p.next()
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan()
// comments separated by an empty line are collected in distinct comment
// groups.
cg := new(ast.CommentGroup)
prevLine := p.file.Line(p.pos)
for p.tok == token.Comment {
line := p.file.Line(p.pos)
if line > prevLine+1 {
if cg != nil && len(cg.List) > 0 {
p.cgs = append(p.cgs, cg)
}
cg = new(ast.CommentGroup)
}
prevLine = line
cg.List = append(cg.List, &ast.Comment{Pound: p.pos, Text: p.lit})
p.pos, p.tok, p.lit = p.scanner.Scan()
}
if cg != nil && len(cg.List) > 0 {
p.cgs = append(p.cgs, cg)
}
}
func (p *parser) parseFile() *ast.File {
var decls []ast.Decl
for p.tok != token.EOF {
decls = append(decls, p.parseDecl("top-level declaration", declStart))
}
return &ast.File{
Comments: p.cgs,
Decls: decls,
EOF: p.pos,
}
}
func (p *parser) parseDecl(errLabel string, syncSet map[token.Token]bool) ast.Decl {
switch p.tok {
case token.Let, token.Var:
return p.parseVarDecl()
case token.At, token.Fn, token.Ref:
return p.parseFuncDecl()
case token.Struct:
return p.parseStructDecl()
case token.Interface:
return p.parseInterfaceDecl()
default:
pos := p.pos
p.errorExpected(pos, errLabel)
p.advance(syncSet)
return &ast.BadStmt{From: pos, To: p.pos}
}
}
func (p *parser) parseStmt() ast.Stmt {
switch p.tok {
case token.Return:
return p.parseReturnStmt()
case token.Lbrace:
bk := p.parseBlock()
p.expectSemi()
return bk
case token.If:
return p.parseIfStmt(true)
case token.Guard:
return p.parseGuardStmt()
// tokens that may start a simple statement
case token.Ident, token.Add, token.Sub, token.Lparen, token.Int, token.String:
return p.parseSimpleStmt()
case token.Semicolon:
if p.lit == ";" {
// explicit semicolon, empty statement, just ignore it and return nil
p.expectSemi()
return nil
}
fallthrough
default:
// otherwise, parse a declaration
return p.parseDecl("statement", stmtStart)
}
}
func (p *parser) parseSimpleStmt() ast.Stmt {
lhs := p.parseExpr()
// check if this is an assignment statement
if p.tok == token.Assign {
opPos := p.expect(token.Assign)
as := &ast.AssignStmt{
Left: lhs,
Assign: opPos,
}
as.Right = p.parseExpr()
p.expectSemi()
return as
}
// otherwise this is an expression statement
es := &ast.ExprStmt{Value: lhs}
p.expectSemi()
return es
}
func (p *parser) parseReturnStmt() *ast.ReturnStmt {
rs := &ast.ReturnStmt{
Ret: p.expect(token.Return),
}
// != Rbrace so that `{ return }` is allowed on the same line
if p.tok != token.Semicolon && p.tok != token.Rbrace {
rs.Value = p.parseExpr()
}
p.expectSemi()
return rs
}
func (p *parser) parseIfStmt(wantSemi bool) *ast.IfStmt {
is := &ast.IfStmt{
If: p.expect(token.If),
}
is.Conds = p.parseExprList(token.Lbrace)
is.Body = p.parseBlock()
if p.tok == token.Else {
is.Else = p.parseElseClause(true)
}
if wantSemi {
p.expectSemi()
}
return is
}
func (p *parser) parseGuardStmt() *ast.GuardStmt {
gs := &ast.GuardStmt{
Guard: p.expect(token.Guard),
}
gs.Conds = p.parseExprList(token.Else, token.Lbrace)
gs.Else = p.parseElseClause(false)
p.expectSemi()
return gs
}
func (p *parser) parseElseClause(allowIf bool) *ast.ElseClause {
ec := &ast.ElseClause{
Else: p.expect(token.Else),
}
if allowIf && p.tok == token.If {
ec.Stmt = p.parseIfStmt(false)
return ec
}
ec.Stmt = p.parseBlock()
return ec
}
func (p *parser) parseVarDecl() *ast.VarDecl {
vd := &ast.VarDecl{
Kw: p.tok,
KwPos: p.pos,
}
p.next()
vars := p.parseVarDefList()
if len(vars) == 0 {
p.expect(token.Ident)
}
p.expectSemi()
vd.Vars = vars
return vd
}
func (p *parser) parseVarDefList() []*ast.VarDef {
var list []*ast.VarDef
for !tokenIn(p.tok, token.EOF, token.Semicolon) {
pos := p.pos
vd := &ast.VarDef{
Name: p.parseIdent(),
}
if p.tok == token.Colon {
vd.Colon = p.expect(token.Colon)
vd.Type = p.parseType()
}
if p.tok == token.Assign {
vd.Assign = p.expect(token.Assign)
vd.Value = p.parseExpr()
}
if vd.Type == nil && vd.Value == nil {
p.error(pos, "missing variable type or initialization")
}
list = append(list, vd)
if p.tok == token.Comma {
vd.Comma = p.expect(token.Comma)
} else {
break
}
}
return list
}
func (p *parser) parseGenericIdentOrTupleIndex() ast.Expr {
if p.tok == token.Int {
// must be a base10 integer (i.e. not 0xabc nor 0b1101)
lit := p.lit
ix, err := strconv.ParseInt(lit, 10, 32)
if err == nil {
return &ast.Ident{
Name: lit,
TupleIndex: int(ix),
IdentPos: p.expect(token.Int),
}
}
}
return p.parseGenericIdent()
}
func (p *parser) parseGenericIdent() ast.Expr {
ident := p.parseIdent()
if p.tok != token.Lbrack {
return ident
}
gc := p.parseGenericClause(genericClauseArgs)
return &ast.GenericIdent{
Ident: ident,
GenericArgs: gc,
}
}
func (p *parser) parseIdent() *ast.Ident {
var name string
if p.tok == token.Ident {
name = p.lit
}
return &ast.Ident{
Name: name,
TupleIndex: -1,
IdentPos: p.expect(token.Ident),
}
}
func (p *parser) parseBasicLit() *ast.BasicLit {
var lit string
tok := token.Illegal
if tokenIn(p.tok, token.String, token.Int) {
lit = p.lit
tok = p.tok
}
return &ast.BasicLit{
Literal: tok,
LitPos: p.expect(token.String, token.Int),
Value: lit,
}
}
func (p *parser) parseType() ast.Expr {
switch p.tok {
case token.Dollar, token.Ident:
last := p.parseTypeName()
expr := last
for p.tok == token.Dot {
// only allow dotted expressions if the last type name expression
// is an Ident.
if _, ok := last.(*ast.Ident); !ok {
break
}
sel := &ast.SelectorExpr{
Left: expr,
Dot: p.expect(token.Dot),
}
last = p.parseTypeName()
sel.Sel = last
expr = sel
}
return expr
case token.Lparen:
// this could be a type in paren, a tuple or a function type
return p.parseFuncTupleOrParenType()
default:
pos := p.pos
p.errorExpected(pos, "type")
return &ast.BadExpr{From: pos, To: p.pos}
}
}
func (p *parser) parseTypeName() ast.Expr {
switch p.tok {
case token.Dollar:
// if this is a generic type name, that's the end of the type name
// (i.e. it can't be followed by ".somethingElse")
pos := p.expect(token.Dollar)
ident := p.parseIdent()
ident.IdentPos = pos
ident.Name = "$" + ident.Name
return ident
case token.Ident:
return p.parseGenericIdent()
default:
pos := p.pos
p.errorExpected(pos, "type name")
return &ast.BadExpr{From: pos, To: p.pos}
}
}
func (p *parser) parseFuncTupleOrParenType() ast.Expr {
var types []*ast.ParamDef
pos := p.pos
lparen := p.expect(token.Lparen)
for !tokenIn(p.tok, token.Rparen, token.EOF) {
pd := &ast.ParamDef{
Type: p.parseType(),
}
types = append(types, pd)
if p.tok == token.Comma {
pd.Comma = p.expect(token.Comma)
} else {
break
}
}
rparen := p.expect(token.Rparen)
// at this point everything from ( to ) has been parsed, determine
// if this is a tuple, a type in parenthesis or a func signature.
if p.tok != token.Rarrow {
switch len(types) {
case 0:
// invalid empty tuple
p.errorExpected(pos, "type")
return &ast.BadExpr{From: pos, To: p.pos}
case 1:
// this is a parenthesized type
return &ast.ParenExpr{
Lparen: lparen,
Value: types[0].Type,
Rparen: rparen,
}
default:
// this is a tuple type
return &ast.TupleType{
Lparen: lparen,
Types: types,
Rparen: rparen,
}
}
}
// at this point we expect a function type
arrow := p.expect(token.Rarrow)
return &ast.FnType{
Lparen: lparen,
Params: types,
Rparen: rparen,
Rarrow: arrow,
RetType: p.parseType(),
}
}
func (p *parser) parseExpr() ast.Expr {
return p.parseOrTest()
}
func (p *parser) parseOrTest() ast.Expr {
lhs := p.parseAndTest()
for p.tok == token.Or {
bin := &ast.BinaryExpr{
Op: p.tok,
OpPos: p.pos,
}
p.next()
rhs := p.parseAndTest()
bin.Left = lhs
bin.Right = rhs
lhs = bin
}
return lhs
}
func (p *parser) parseAndTest() ast.Expr {
lhs := p.parseComparison()
for p.tok == token.And {
bin := &ast.BinaryExpr{
Op: p.tok,
OpPos: p.pos,
}
p.next()
rhs := p.parseComparison()
bin.Left = lhs
bin.Right = rhs
lhs = bin
}
return lhs
}
func (p *parser) parseComparison() ast.Expr {
lhs := p.parseArithmetic()
for tokenIn(p.tok, token.Eq, token.NotEq, token.Lt, token.Lte, token.Gt, token.Gte) {
bin := &ast.BinaryExpr{
Op: p.tok,
OpPos: p.pos,
}
p.next()
rhs := p.parseArithmetic()
bin.Left = lhs
bin.Right = rhs
lhs = bin
}
return lhs
}
func (p *parser) parseArithmetic() ast.Expr {
lhs := p.parseTerm()
for tokenIn(p.tok, token.Add, token.Sub) {
bin := &ast.BinaryExpr{
Op: p.tok,
OpPos: p.pos,
}
p.next()
rhs := p.parseTerm()
bin.Left = lhs
bin.Right = rhs
lhs = bin
}
return lhs
}
func (p *parser) parseTerm() ast.Expr {
lhs := p.parseFactor()
for tokenIn(p.tok, token.Mul, token.Div, token.Mod) {
bin := &ast.BinaryExpr{
Op: p.tok,
OpPos: p.pos,
}
p.next()
rhs := p.parseFactor()
bin.Left = lhs
bin.Right = rhs
lhs = bin
}
return lhs
}
func (p *parser) parseFactor() ast.Expr {
if tokenIn(p.tok, token.Add, token.Sub, token.Not) {
un := &ast.UnaryExpr{
Op: p.tok,
OpPos: p.pos,
}
p.next()
un.Right = p.parseFactor()
return un
}
return p.parseCompound()
}
func (p *parser) parseCompound() ast.Expr {
expr := p.parseAtom()
loop:
for {
switch p.tok {
case token.Lparen:
// call expression
call := &ast.CallExpr{
Left: expr,
Lparen: p.expect(token.Lparen),
}
call.Args = p.parseExprOrLabelList(token.Rparen)
call.Rparen = p.expect(token.Rparen)
expr = call
case token.Dot:
// selector expression
sel := &ast.SelectorExpr{
Left: expr,
Dot: p.expect(token.Dot),
}
sel.Sel = p.parseGenericIdentOrTupleIndex()
expr = sel
default:
break loop
}
}
return expr
}
func (p *parser) parseExprOrLabelList(stop ...token.Token) []*ast.ExprItem {
var list []*ast.ExprItem
for p.tok != token.EOF {
if tokenIn(p.tok, stop...) {
break
}
ei := &ast.ExprItem{
Expr: p.parseExpr(),
}
list = append(list, ei)
if id, ok := ei.Expr.(*ast.Ident); ok && p.tok == token.Colon {
// this is a labelled expression
ei.Label = id
ei.Colon = p.expect(token.Colon)
ei.Expr = p.parseExpr()
}
if p.tok == token.Comma {
ei.Comma = p.expect(token.Comma)
} else {
break
}
}
return list
}
func (p *parser) parseExprList(stop ...token.Token) []*ast.ExprItem {
var list []*ast.ExprItem
for p.tok != token.EOF {
if tokenIn(p.tok, stop...) {
break
}
ei := &ast.ExprItem{
Expr: p.parseExpr(),
}
list = append(list, ei)
if p.tok == token.Comma {
ei.Comma = p.expect(token.Comma)
} else {
break
}
}
return list
}
func (p *parser) parseAtom() ast.Expr {
switch p.tok {
case token.Lparen:
lparen := p.expect(token.Lparen)
values := p.parseExprList(token.Rparen)
rparen := p.expect(token.Rparen)
switch len(values) {
case 1:
return &ast.ParenExpr{
Lparen: lparen,
Value: values[0].Expr,
Rparen: rparen,
}
case 0:
pos := lparen
p.errorExpected(pos, "expression")
return &ast.BadExpr{From: pos, To: p.pos}
default:
return &ast.TupleExpr{
Lparen: lparen,
Values: values,
Rparen: rparen,
}
}
case token.Ident:
return p.parseGenericIdent()
case token.String, token.Int:
return p.parseBasicLit()
default:
pos := p.pos
p.errorExpected(pos, "expression")
return &ast.BadExpr{From: pos, To: p.pos}
}
}
func (p *parser) parseAttr() *ast.Attr {
a := &ast.Attr{
At: p.expect(token.At),
}
a.Name = p.parseIdent()
a.Lbrace = p.expect(token.Lparen)
a.Fields = p.parseKeyValueList()
a.Rbrace = p.expect(token.Rparen)
p.expectSemi()
return a
}
func (p *parser) parseKeyValueList() []*ast.KeyValue {
var list []*ast.KeyValue
for !tokenIn(p.tok, token.Rparen, token.EOF) {
kv := &ast.KeyValue{
Key: p.parseIdent(),
}
kv.Colon = p.expect(token.Colon)
kv.Value = p.parseBasicLit()
list = append(list, kv)
if p.tok == token.Comma {
kv.Comma = p.expect(token.Comma)
} else {
break
}
}
return list
}
func (p *parser) parseFuncDecl() *ast.FnDecl {
var attrs []*ast.Attr
for p.tok == token.At {
attrs = append(attrs, p.parseAttr())
}
fd := &ast.FnDecl{Attrs: attrs}
if p.tok == token.Ref {
fd.Ref = p.expect(token.Ref)
}
fd.Fn = p.expect(token.Fn)
fd.Name = p.parseIdent()
if p.tok == token.Lbrack {
fd.GenericParams = p.parseGenericClause(genericClauseParams)
}
fd.Signature = p.parseFuncSig()
// stop here if this is a function declaration (no body)
if p.tok != token.Lbrace {
p.expectSemi()
return fd
}
// continue with function body (function definition)
fd.Body = p.parseBlock()
p.expectSemi()
return fd
}
func (p *parser) parseBlock() *ast.Block {
lbrace := p.expect(token.Lbrace)
stmts := p.parseStmtList()
rbrace := p.expect(token.Rbrace)
return &ast.Block{
Lbrace: lbrace,
Stmts: stmts,
Rbrace: rbrace,
}
}
func (p *parser) parseStmtList() []ast.Stmt {
var list []ast.Stmt
for !tokenIn(p.tok, token.Rbrace, token.EOF) {
// ignore empty statements
if stmt := p.parseStmt(); stmt != nil {
list = append(list, stmt)
}
}
return list
}
func (p *parser) parseFuncSig() *ast.FnSig {
sig := &ast.FnSig{
Lparen: p.expect(token.Lparen),
}
sig.Params = p.parseParamsList()
sig.Rparen = p.expect(token.Rparen)
if p.tok == token.Rarrow {
sig.Rarrow = p.expect(token.Rarrow)
sig.RetType = p.parseType()
}
return sig
}
func (p *parser) parseParamsList() []*ast.ParamDef {
var params []*ast.ParamDef
for !tokenIn(p.tok, token.Rparen, token.EOF) {
arg := &ast.ParamDef{
Name: p.parseIdent(),
}
arg.Colon = p.expect(token.Colon)
arg.Type = p.parseType()
params = append(params, arg)
if p.tok == token.Comma {
arg.Comma = p.expect(token.Comma)
} else {
break
}
}
return params
}
func (p *parser) parseStructDecl() *ast.StructDecl {
str := &ast.StructDecl{
Struct: p.expect(token.Struct),
}
str.Name = p.parseIdent()
if p.tok == token.Lbrack {
str.GenericParams = p.parseGenericClause(genericClauseParams)
}
str.Lbrace = p.expect(token.Lbrace)
var decls []ast.Decl
for !tokenIn(p.tok, token.Rbrace, token.EOF) {
decls = append(decls, p.parseDecl("declaration", declStart))
}
str.Decls = decls
str.Rbrace = p.expect(token.Rbrace)
p.expectSemi()
return str
}
func (p *parser) parseInterfaceDecl() *ast.InterfaceDecl {
id := &ast.InterfaceDecl{
Interface: p.expect(token.Interface),
}
id.Name = p.parseIdent()
if p.tok == token.Lbrack {
id.GenericParams = p.parseGenericClause(genericClauseParams)
}
id.Lbrace = p.expect(token.Lbrace)
var methods []*ast.FnDecl
for !tokenIn(p.tok, token.Rbrace, token.EOF) {
fn := p.parseFuncDecl()
if len(fn.Attrs) > 0 {
p.error(fn.Pos(), "interface method cannot have attributes")
}
if fn.Ref.IsValid() {
p.error(fn.Pos(), "interface method cannot be marked as ref")
}
if fn.GenericParams != nil {
p.error(fn.Pos(), "interface method cannot have a generic clause (the interface itself can)")
}
if fn.Body != nil {
p.error(fn.Pos(), "interface method cannot have a body")
}
methods = append(methods, fn)
}
id.Methods = methods
id.Rbrace = p.expect(token.Rbrace)
p.expectSemi()
return id
}
type genericClauseMode int
const (
genericClauseParams genericClauseMode = iota
genericClauseArgs
)
func (p *parser) parseGenericClause(mode genericClauseMode) *ast.GenericClause {
gc := &ast.GenericClause{
Lbrack: p.expect(token.Lbrack),
}
var types []*ast.ParamDef
for !tokenIn(p.tok, token.Rbrack, token.EOF) {
var name *ast.Ident
var typ ast.Expr
switch mode {
case genericClauseParams:
pos := p.expect(token.Dollar)
name = p.parseIdent()
name.IdentPos = pos
name.Name = "$" + name.Name
case genericClauseArgs:
typ = p.parseType()
default:
panic(fmt.Sprintf("invalid genericClauseMode: %d", mode))
}
pd := &ast.ParamDef{
Name: name,
Type: typ,
}
types = append(types, pd)
if p.tok == token.Comma {
pd.Comma = p.expect(token.Comma)
} else {
break
}
}
gc.Types = types
gc.Rbrack = p.expect(token.Rbrack)
return gc
}
func (p *parser) error(pos token.Pos, msg string) {
lpos := p.file.Position(pos)
p.errors.Add(lpos, msg)
}
func (p *parser) errorExpected(pos token.Pos, msg string) {
msg = "expected " + msg
if pos == p.pos {
// the error happened at the current position;
// make the error message more specific
switch {
case p.tok.Literal():
// print 123 rather than 'INT', etc.
msg += ", found " + p.lit
default:
msg += ", found '" + p.tok.String() + "'"
}
}
p.error(pos, msg)
}
func (p *parser) expect(toks ...token.Token) token.Pos {
pos := p.pos
var lbl string
var ok bool
for i, tok := range toks {
if p.tok == tok {
ok = true
break
}
if i > 0 {
lbl += ", "
}
lbl += "'" + tok.String() + "'"
}
if !ok {
if len(toks) > 1 {
lbl = "one of " + lbl
}
p.errorExpected(pos, lbl)
}
p.next() // make progress
return pos
}
func tokenIn(t token.Token, toks ...token.Token) bool {
for _, tok := range toks {
if t == tok {
return true
}
}
return false
}
func (p *parser) expectSemi() {
// semicolon is optional before a closing ')' or '}'
if p.tok != token.Rparen && p.tok != token.Rbrace {
switch p.tok {
case token.Comma:
// properly parse a ',' instead of a ';' but add error
p.errorExpected(p.pos, "';'")
fallthrough
case token.Semicolon:
p.next()
default:
p.errorExpected(p.pos, "';'")
p.advance(stmtStart)
}
}
}
// sets of tokens that can be sync'd to (in a call to p.advance)
var (
declStart = map[token.Token]bool{
token.Var: true,
token.Let: true,
token.Fn: true,
token.Struct: true,
}
stmtStart = map[token.Token]bool{
token.Var: true,
token.Let: true,
token.Fn: true,
token.Struct: true,
token.Return: true,
token.If: true,
token.Guard: true,
}
)
func (p *parser) advance(to map[token.Token]bool) {
for p.tok != token.EOF {
if to[p.tok] {
return
}
p.next()
}
}