~mna/fastpeg

548de8031fc813375c31144ad2d22d28de80a726 — Martin Angers 5 years ago 6187915
internal/bootstrap/scanner: support error collection
1 files changed, 28 insertions(+), 9 deletions(-)

M internal/bootstrap/scanner/scanner.go
M internal/bootstrap/scanner/scanner.go => internal/bootstrap/scanner/scanner.go +28 -9
@@ 5,6 5,7 @@ package scanner

import (
	"fmt"
	"go/scanner"
	"unicode"
	"unicode/utf8"



@@ 18,13 19,17 @@ const (
	eof = -1
)

// ErrorList is a list of errors.
type ErrorList struct{ scanner.ErrorList }

// A Scanner holds the scanner's internal state while processing a
// given input. It can be allocated as part of another data structure
// but must be initialized via Init before use.
type Scanner struct {
	// immutable fields
	// immutable fields outside of Init calls
	file *token.File
	src  []byte
	errs *ErrorList

	// mutable state
	rn      rune // current rune


@@ 40,7 45,7 @@ type Scanner struct {
// size.
func (s *Scanner) Init(file *token.File, src []byte) {
	if file.Size() != len(src) {
		panic(fmt.Sprintf("file size does not match input size: %d bytes vs %d bytes", file.Size(), len(src)))
		panic(fmt.Sprintf("%s: file size does not match input size: %d bytes vs %d bytes", file.Name(), file.Size(), len(src)))
	}

	s.file = file


@@ 78,14 83,14 @@ func (s *Scanner) advance() {
	rn, sz := rune(s.src[s.nextpos]), 1
	switch {
	case rn == 0:
		// TODO: error, illegal NUL character
		s.error(s.curpos, "illegal character NUL")
	case rn >= utf8.RuneSelf:
		// not ascii
		rn, sz = utf8.DecodeRune(s.src[s.nextpos:])
		if rn == utf8.RuneError && sz == 1 {
			// TODO: illegal utf8 encoding
			s.error(s.curpos, "illegal UTF-8 encoding")
		} else if rn == bom && s.curpos > 0 {
			// TODO: illegal BOM mark
			s.error(s.curpos, "illegal byte-order mark")
		}
	}



@@ 93,6 98,11 @@ func (s *Scanner) advance() {
	s.rn = rn
}

func (s *Scanner) error(offset int, msg string) {
	fpos := s.file.Position(s.file.Pos(offset))
	s.errs.Add(fpos, msg)
}

func (s *Scanner) peek() byte {
	if s.nextpos < len(s.src) {
		return s.src[s.nextpos]


@@ 144,7 154,7 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {

		case '<':
			if s.rn != '-' {
				// TODO: Illegal, report error
				s.error(s.curpos-1, fmt.Sprintf("incomplete arrow symbol: illegal character %#U", s.rn))
				break
			}
			s.advance()


@@ 169,10 179,18 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
			lit = s.scanCoderef()

		case '$':
			if s.rn != '{' {
				s.error(s.curpos-1, fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn))
				break
			}
			tok = token.StateCoderef
			lit = s.scanCoderef()

		case '@':
			if s.rn != '{' {
				s.error(s.curpos-1, fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn))
				break
			}
			tok = token.ActionCoderef
			lit = s.scanCoderef()



@@ 182,7 200,7 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
		default:
			// s.advance already reports bom, ignore it here
			if rn != bom {
				// TODO: report illegal char
				s.error(s.file.Offset(pos.Pos), fmt.Sprintf("illegal character %#U", rn))
			}
			tok = token.Illegal
			lit = string(rn)


@@ 212,6 230,7 @@ func (s *Scanner) scanCharClass() string {

func (s *Scanner) scanCoderef() string {
	// when called, s.rn is on the start '{'
	start := s.curpos
	s.advance()

	s.skipWhitespace()


@@ 221,10 240,10 @@ func (s *Scanner) scanCoderef() string {
	if s.rn == '}' {
		s.advance()
	} else {
		// TODO: error, unclosed coderef
		s.error(start, "coderef not terminated")
	}
	if ident == "" {
		// TODO: error, empty coderef
		s.error(start, "coderef missing identifier")
	}
	return ident
}