From 3a53f16966e476d19265aa66feacfb229b58ee02 Mon Sep 17 00:00:00 2001 From: Martin Angers Date: Tue, 7 May 2019 13:52:47 -0400 Subject: [PATCH] internal/bootstrap/scanner: more scanner tests --- internal/bootstrap/scanner/scanner.go | 26 +- internal/bootstrap/scanner/scanner_test.go | 295 +++++++++++++++++++++ 2 files changed, 318 insertions(+), 3 deletions(-) diff --git a/internal/bootstrap/scanner/scanner.go b/internal/bootstrap/scanner/scanner.go index f81e446..aea7cdd 100644 --- a/internal/bootstrap/scanner/scanner.go +++ b/internal/bootstrap/scanner/scanner.go @@ -8,6 +8,7 @@ package scanner import ( "fmt" "go/scanner" + "io" "unicode" "unicode/utf8" @@ -24,6 +25,13 @@ const ( // ErrorList is a list of errors. type ErrorList = scanner.ErrorList +// PrintError is a utility function that prints a list of errors +// to w, one error per line, if the err parameter is an ErrorList. +// Otherwise it prints the err string. +func PrintError(w io.Writer, err error) { + scanner.PrintError(w, err) +} + // A Scanner holds the scanner's internal state while processing a // given input. It can be allocated as part of another data structure // but must be initialized via Init before use. @@ -164,7 +172,11 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { case '<': if s.rn != '-' { - s.error(s.curpos-1, fmt.Sprintf("incomplete arrow symbol: illegal character %#U", s.rn)) + msg := fmt.Sprintf("incomplete arrow symbol: illegal character %#U", s.rn) + if s.rn == eof { + msg = "arrow symbol not terminated" + } + s.error(s.curpos-1, msg) break } s.advance() @@ -190,7 +202,11 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { case '$': if s.rn != '{' { - s.error(s.curpos-1, fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn)) + msg := fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn) + if s.rn == eof { + msg = "state coderef not terminated" + } + s.error(s.curpos-1, msg) break } tok = token.StateCoderef @@ -198,7 +214,11 @@ func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { case '@': if s.rn != '{' { - s.error(s.curpos-1, fmt.Sprintf("invalid state coderef: illegal character %#U", s.rn)) + msg := fmt.Sprintf("invalid action coderef: illegal character %#U", s.rn) + if s.rn == eof { + msg = "action coderef not terminated" + } + s.error(s.curpos-1, msg) break } tok = token.ActionCoderef diff --git a/internal/bootstrap/scanner/scanner_test.go b/internal/bootstrap/scanner/scanner_test.go index b7d2737..034891b 100644 --- a/internal/bootstrap/scanner/scanner_test.go +++ b/internal/bootstrap/scanner/scanner_test.go @@ -1,7 +1,11 @@ package scanner import ( + "fmt" + "os" + "strings" "testing" + "unicode/utf8" "git.sr.ht/~mna/fastpeg/internal/bootstrap/token" "github.com/stretchr/testify/require" @@ -90,6 +94,297 @@ func TestScanner_Escapes(t *testing.T) { require.NoError(t, s.Err()) } +func TestScanner_Errors(t *testing.T) { + cases := []struct { + input string + output []tuple + errMsg string + }{ + {"", nil, ""}, + { + string(rune(bom)) + "a", + []tuple{{token.Identifier, "a"}}, + "", + }, + { + "a" + string(rune(bom)), + []tuple{ + {token.Identifier, "a"}, + {token.Illegal, "\ufeff"}, + }, + "illegal byte-order mark", + }, + { + "\x00", + []tuple{{token.Illegal, "\x00"}}, + "illegal character NUL", + }, + { + "\xff", + []tuple{{token.Illegal, string(utf8.RuneError)}}, + "illegal UTF-8 encoding", + }, + { + "