~mna/fastpeg

d8fadd0cb30377c98f5f69b5c3603b436149a45d — Martin Angers 9 months ago 866d588
internal/bootstrap/scanner: test scanner
M internal/bootstrap/scanner/scanner.go => internal/bootstrap/scanner/scanner.go +15 -10
@@ 22,7 22,7 @@ const (
)

// ErrorList is a list of errors.
type ErrorList struct{ scanner.ErrorList }
type ErrorList = scanner.ErrorList

// A Scanner holds the scanner's internal state while processing a
// given input. It can be allocated as part of another data structure


@@ 52,6 52,11 @@ func (s *Scanner) Init(file *token.File, src []byte) {

	s.file = file
	s.src = src
	if s.errs == nil {
		// TODO: probably needs to be passed by the parser
		s.errs = new(ErrorList)
	}

	s.rn = 0
	s.curpos = 0
	s.nextpos = 0


@@ 64,6 69,12 @@ func (s *Scanner) Init(file *token.File, src []byte) {
	}
}

// Err returns the error(s) registered in the error list of the
// scanner if any, or nil.
func (s *Scanner) Err() error {
	return s.errs.Err()
}

// advance the scanner to the next rn in the src.
// rn == -1 on EOF.
func (s *Scanner) advance() {


@@ 74,6 85,7 @@ func (s *Scanner) advance() {
			s.file.AddLine(s.linepos)
		}
		s.rn = eof
		return
	}

	s.curpos = s.nextpos


@@ 108,13 120,6 @@ func (s *Scanner) error(offset int, msg string) {
	s.errs.Add(fpos, msg)
}

func (s *Scanner) peek() byte {
	if s.nextpos < len(s.src) {
		return s.src[s.nextpos]
	}
	return 0
}

func (s *Scanner) skipWhitespace() {
	for s.rn == ' ' || s.rn == '\t' || s.rn == '\n' || s.rn == '\r' {
		s.advance()


@@ 136,7 141,7 @@ func (s *Scanner) skipWhitespace() {
func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
	s.skipWhitespace()

	pos = token.Pos{Pos: s.file.Pos(s.curpos)}
	pos = s.file.Pos(s.curpos)
	switch rn := s.rn; {
	case isLetter(rn):
		tok = token.Identifier


@@ 205,7 210,7 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
		default:
			// s.advance already reports bom, ignore it here
			if rn != bom {
				s.error(s.file.Offset(pos.Pos), fmt.Sprintf("illegal character %#U", rn))
				s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", rn))
			}
			tok = token.Illegal
			lit = string(rn)

A internal/bootstrap/scanner/scanner_test.go => internal/bootstrap/scanner/scanner_test.go +107 -0
@@ 0,0 1,107 @@
package scanner

import (
	"testing"

	"git.sr.ht/~mna/fastpeg/internal/bootstrap/token"
	"github.com/stretchr/testify/require"
)

type tuple struct {
	tok token.Token
	lit string
}

func TestScanner_Tokens(t *testing.T) {
	input := `
abc "" 'def'
[] <- ←⟵=
| /     &
&{a} ${ _1 } @{ _-_ }
. : ; ! ? * + ()
"'x'" '"y"' [z]
`

	output := []tuple{
		{token.Identifier, "abc"},
		{token.Literal, `""`},
		{token.Literal, `'def'`},
		{token.CharClass, `[]`},
		{token.Arrow, `<-`},
		{token.Arrow, `←`},
		{token.Arrow, `⟵`},
		{token.Arrow, `=`},
		{token.Separator, `|`},
		{token.Separator, `/`},
		{token.Ampersand, ``},
		{token.PredCoderef, `a`},
		{token.StateCoderef, `_1`},
		{token.ActionCoderef, `_-_`},
		{token.Dot, ``},
		{token.Colon, ``},
		{token.Semicolon, ``},
		{token.Exclamation, ``},
		{token.Question, ``},
		{token.Star, ``},
		{token.Plus, ``},
		{token.Lparen, ``},
		{token.Rparen, ``},
		{token.Literal, `"'x'"`},
		{token.Literal, `'"y"'`},
		{token.CharClass, `[z]`},
	}

	var s Scanner
	result := scanAll(&s, input)
	require.Equal(t, output, result)
	require.NoError(t, s.Err())
}

func TestScanner_Escapes(t *testing.T) {
	input := `
"\a\b\f\n\r\t\v\\\""
'\a\b\f\n\r\t\v\\\''
"\x00\x10\xa1\xAF\xff"
'\x00\x10\xA1\xaf\xff'
[\x00\x10\xA1\xaf\xff]
"\u0123\uabcd\uEFef\U00045678"
'\u0123\uabcd\uEFef\U00045678'
[\u0123\uabcd\uEFef\U00045678]
[\a\b\f\n\r\t\v\\\]\[\-\^]
[\pC\PL\p{X}\P{Latin}]
`

	output := []tuple{
		{token.Literal, `"\a\b\f\n\r\t\v\\\""`},
		{token.Literal, `'\a\b\f\n\r\t\v\\\''`},
		{token.Literal, `"\x00\x10\xa1\xAF\xff"`},
		{token.Literal, `'\x00\x10\xA1\xaf\xff'`},
		{token.CharClass, `[\x00\x10\xA1\xaf\xff]`},
		{token.Literal, `"\u0123\uabcd\uEFef\U00045678"`},
		{token.Literal, `'\u0123\uabcd\uEFef\U00045678'`},
		{token.CharClass, `[\u0123\uabcd\uEFef\U00045678]`},
		{token.CharClass, `[\a\b\f\n\r\t\v\\\]\[\-\^]`},
		{token.CharClass, `[\pC\PL\p{X}\P{Latin}]`},
	}

	var s Scanner
	result := scanAll(&s, input)
	require.Equal(t, output, result)
	require.NoError(t, s.Err())
}

func scanAll(s *Scanner, input string) []tuple {
	var result []tuple

	fs := token.NewFileSet()
	f := fs.AddFile("test", -1, len(input))
	s.Init(f, []byte(input))
	for {
		_, tok, lit := s.Scan()
		if tok == token.EOF {
			break
		}
		result = append(result, tuple{tok, lit})
	}
	return result
}

M internal/bootstrap/token/token.go => internal/bootstrap/token/token.go +9 -4
@@ 12,27 12,32 @@ import (
type (
	// A File is a handle for a file belonging to a FileSet.
	// A File has a name, size, and line offset table.
	File struct{ token.File }
	File = token.File

	// A FileSet represents a set of source files. Methods of
	// file sets are synchronized; multiple goroutines may invoke
	// them concurrently.
	FileSet struct{ token.FileSet }
	FileSet = token.FileSet

	// Pos is a compact encoding of a source position within a file set.
	// It can be converted into a Position for a more convenient,
	// but much larger, representation.
	Pos struct{ token.Pos }
	Pos = token.Pos

	// Position describes an arbitrary source position including the
	// file, line, and column location. A Position is valid if the
	// line number is > 0.
	Position struct{ token.Position }
	Position = token.Position

	// Token is the set of lexical tokens of the fastpeg language.
	Token int
)

// NewFileSet creates a new file set.
func NewFileSet() *FileSet {
	return token.NewFileSet()
}

// List of possible tokens.
const (
	Illegal Token = iota