@@ 22,7 22,7 @@ const (
)
// ErrorList is a list of errors.
-type ErrorList struct{ scanner.ErrorList }
+type ErrorList = scanner.ErrorList
// A Scanner holds the scanner's internal state while processing a
// given input. It can be allocated as part of another data structure
@@ 52,6 52,11 @@ func (s *Scanner) Init(file *token.File, src []byte) {
s.file = file
s.src = src
+ if s.errs == nil {
+ // TODO: probably needs to be passed by the parser
+ s.errs = new(ErrorList)
+ }
+
s.rn = 0
s.curpos = 0
s.nextpos = 0
@@ 64,6 69,12 @@ func (s *Scanner) Init(file *token.File, src []byte) {
}
}
+// Err returns the error(s) registered in the error list of the
+// scanner if any, or nil.
+func (s *Scanner) Err() error {
+ return s.errs.Err()
+}
+
// advance the scanner to the next rn in the src.
// rn == -1 on EOF.
func (s *Scanner) advance() {
@@ 74,6 85,7 @@ func (s *Scanner) advance() {
s.file.AddLine(s.linepos)
}
s.rn = eof
+ return
}
s.curpos = s.nextpos
@@ 108,13 120,6 @@ func (s *Scanner) error(offset int, msg string) {
s.errs.Add(fpos, msg)
}
-func (s *Scanner) peek() byte {
- if s.nextpos < len(s.src) {
- return s.src[s.nextpos]
- }
- return 0
-}
-
func (s *Scanner) skipWhitespace() {
for s.rn == ' ' || s.rn == '\t' || s.rn == '\n' || s.rn == '\r' {
s.advance()
@@ 136,7 141,7 @@ func (s *Scanner) skipWhitespace() {
func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
s.skipWhitespace()
- pos = token.Pos{Pos: s.file.Pos(s.curpos)}
+ pos = s.file.Pos(s.curpos)
switch rn := s.rn; {
case isLetter(rn):
tok = token.Identifier
@@ 205,7 210,7 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
default:
// s.advance already reports bom, ignore it here
if rn != bom {
- s.error(s.file.Offset(pos.Pos), fmt.Sprintf("illegal character %#U", rn))
+ s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", rn))
}
tok = token.Illegal
lit = string(rn)
@@ 0,0 1,107 @@
+package scanner
+
+import (
+ "testing"
+
+ "git.sr.ht/~mna/fastpeg/internal/bootstrap/token"
+ "github.com/stretchr/testify/require"
+)
+
+type tuple struct {
+ tok token.Token
+ lit string
+}
+
+func TestScanner_Tokens(t *testing.T) {
+ input := `
+abc "" 'def'
+[] <- ←⟵=
+| / &
+&{a} ${ _1 } @{ _-_ }
+. : ; ! ? * + ()
+"'x'" '"y"' [z]
+`
+
+ output := []tuple{
+ {token.Identifier, "abc"},
+ {token.Literal, `""`},
+ {token.Literal, `'def'`},
+ {token.CharClass, `[]`},
+ {token.Arrow, `<-`},
+ {token.Arrow, `←`},
+ {token.Arrow, `⟵`},
+ {token.Arrow, `=`},
+ {token.Separator, `|`},
+ {token.Separator, `/`},
+ {token.Ampersand, ``},
+ {token.PredCoderef, `a`},
+ {token.StateCoderef, `_1`},
+ {token.ActionCoderef, `_-_`},
+ {token.Dot, ``},
+ {token.Colon, ``},
+ {token.Semicolon, ``},
+ {token.Exclamation, ``},
+ {token.Question, ``},
+ {token.Star, ``},
+ {token.Plus, ``},
+ {token.Lparen, ``},
+ {token.Rparen, ``},
+ {token.Literal, `"'x'"`},
+ {token.Literal, `'"y"'`},
+ {token.CharClass, `[z]`},
+ }
+
+ var s Scanner
+ result := scanAll(&s, input)
+ require.Equal(t, output, result)
+ require.NoError(t, s.Err())
+}
+
+func TestScanner_Escapes(t *testing.T) {
+ input := `
+"\a\b\f\n\r\t\v\\\""
+'\a\b\f\n\r\t\v\\\''
+"\x00\x10\xa1\xAF\xff"
+'\x00\x10\xA1\xaf\xff'
+[\x00\x10\xA1\xaf\xff]
+"\u0123\uabcd\uEFef\U00045678"
+'\u0123\uabcd\uEFef\U00045678'
+[\u0123\uabcd\uEFef\U00045678]
+[\a\b\f\n\r\t\v\\\]\[\-\^]
+[\pC\PL\p{X}\P{Latin}]
+`
+
+ output := []tuple{
+ {token.Literal, `"\a\b\f\n\r\t\v\\\""`},
+ {token.Literal, `'\a\b\f\n\r\t\v\\\''`},
+ {token.Literal, `"\x00\x10\xa1\xAF\xff"`},
+ {token.Literal, `'\x00\x10\xA1\xaf\xff'`},
+ {token.CharClass, `[\x00\x10\xA1\xaf\xff]`},
+ {token.Literal, `"\u0123\uabcd\uEFef\U00045678"`},
+ {token.Literal, `'\u0123\uabcd\uEFef\U00045678'`},
+ {token.CharClass, `[\u0123\uabcd\uEFef\U00045678]`},
+ {token.CharClass, `[\a\b\f\n\r\t\v\\\]\[\-\^]`},
+ {token.CharClass, `[\pC\PL\p{X}\P{Latin}]`},
+ }
+
+ var s Scanner
+ result := scanAll(&s, input)
+ require.Equal(t, output, result)
+ require.NoError(t, s.Err())
+}
+
+func scanAll(s *Scanner, input string) []tuple {
+ var result []tuple
+
+ fs := token.NewFileSet()
+ f := fs.AddFile("test", -1, len(input))
+ s.Init(f, []byte(input))
+ for {
+ _, tok, lit := s.Scan()
+ if tok == token.EOF {
+ break
+ }
+ result = append(result, tuple{tok, lit})
+ }
+ return result
+}