@@ 5,6 5,7 @@ package scanner
import (
"fmt"
+ "go/scanner"
"unicode"
"unicode/utf8"
@@ 18,13 19,17 @@ const (
eof = -1
)
+// ErrorList is a list of errors.
+type ErrorList struct{ scanner.ErrorList }
+
// A Scanner holds the scanner's internal state while processing a
// given input. It can be allocated as part of another data structure
// but must be initialized via Init before use.
type Scanner struct {
- // immutable fields
+ // immutable fields outside of Init calls
file *token.File
src []byte
+ errs *ErrorList
// mutable state
rn rune // current rune
@@ 40,7 45,7 @@ type Scanner struct {
// size.
func (s *Scanner) Init(file *token.File, src []byte) {
if file.Size() != len(src) {
- panic(fmt.Sprintf("file size does not match input size: %d bytes vs %d bytes", file.Size(), len(src)))
+ panic(fmt.Sprintf("%s: file size does not match input size: %d bytes vs %d bytes", file.Name(), file.Size(), len(src)))
}
s.file = file
@@ 78,14 83,14 @@ func (s *Scanner) advance() {
rn, sz := rune(s.src[s.nextpos]), 1
switch {
case rn == 0:
- // TODO: error, illegal NUL character
+ s.error(s.curpos, "illegal character NUL")
case rn >= utf8.RuneSelf:
// not ascii
rn, sz = utf8.DecodeRune(s.src[s.nextpos:])
if rn == utf8.RuneError && sz == 1 {
- // TODO: illegal utf8 encoding
+ s.error(s.curpos, "illegal UTF-8 encoding")
} else if rn == bom && s.curpos > 0 {
- // TODO: illegal BOM mark
+ s.error(s.curpos, "illegal byte-order mark")
}
}
@@ 93,6 98,11 @@ func (s *Scanner) advance() {
s.rn = rn
}
+func (s *Scanner) error(offset int, msg string) {
+ fpos := s.file.Position(s.file.Pos(offset))
+ s.errs.Add(fpos, msg)
+}
+
func (s *Scanner) peek() byte {
if s.nextpos < len(s.src) {
return s.src[s.nextpos]
@@ 144,7 154,7 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
case '<':
if s.rn != '-' {
- // TODO: Illegal, report error
+ s.error(s.curpos-1, fmt.Sprintf("incomplete arrow symbol: illegal character %#U", s.rn))
break
}
s.advance()
@@ 169,10 179,18 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
lit = s.scanCoderef()
case '$':
+ if s.rn != '{' {
+ s.error(s.curpos-1, fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn))
+ break
+ }
tok = token.StateCoderef
lit = s.scanCoderef()
case '@':
+ if s.rn != '{' {
+ s.error(s.curpos-1, fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn))
+ break
+ }
tok = token.ActionCoderef
lit = s.scanCoderef()
@@ 182,7 200,7 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
default:
// s.advance already reports bom, ignore it here
if rn != bom {
- // TODO: report illegal char
+ s.error(s.file.Offset(pos.Pos), fmt.Sprintf("illegal character %#U", rn))
}
tok = token.Illegal
lit = string(rn)
@@ 212,6 230,7 @@ func (s *Scanner) scanCharClass() string {
func (s *Scanner) scanCoderef() string {
// when called, s.rn is on the start '{'
+ start := s.curpos
s.advance()
s.skipWhitespace()
@@ 221,10 240,10 @@ func (s *Scanner) scanCoderef() string {
if s.rn == '}' {
s.advance()
} else {
- // TODO: error, unclosed coderef
+ s.error(start, "coderef not terminated")
}
if ident == "" {
- // TODO: error, empty coderef
+ s.error(start, "coderef missing identifier")
}
return ident
}