~jojo/Carth

039652e6355e2b70981a4912f4ff0cf87d90fbbc — JoJo 1 year, 1 month ago 66b6097
Make parser act on token trees instead of chars
M app/Main.hs => app/Main.hs +10 -11
@@ 4,6 4,7 @@ module Main (main) where

import System.Environment
import Control.Monad
import Control.Monad.Except
import Prelude hiding (lex)

import Misc


@@ 56,10 57,10 @@ frontend :: Config cfg => cfg -> FilePath -> IO Ast.Program
frontend cfg f = do
    let d = getDebug cfg
    verbose cfg ("   Lexing")
    cst <- lex f
    when d $ writeFile ".dbg.lexd" (show cst)
    tts <- lex f
    when d $ writeFile ".dbg.lexd" (show tts)
    verbose cfg ("   Parsing")
    ast <- parse f
    ast <- parse f tts
    when d $ writeFile ".dbg.parsed" (pretty ast)
    verbose cfg ("   Typechecking")
    ann <- typecheck' f ast


@@ 72,18 73,16 @@ frontend cfg f = do
    pure opt

lex :: FilePath -> IO [Lexd.TokenTree]
lex f = Lex.lex f >>= \case
    Left e -> putStrLn (formatParseErr e) >> abort f
lex f = runExceptT (Lex.lex f) >>= \case
    Left e -> putStrLn (formatLexErr e) >> abort f
    Right p -> pure p
  where
    formatParseErr e = let ss = lines e in (unlines ((head ss ++ " Error:") : tail ss))
    formatLexErr e = let ss = lines e in (unlines ((head ss ++ " Error:") : tail ss))

parse :: FilePath -> IO Parsed.Program
parse f = Parse.parse f >>= \case
    Left e -> putStrLn (formatParseErr e) >> abort f
parse :: FilePath -> [Lexd.TokenTree] -> IO Parsed.Program
parse f tts = case runExcept (Parse.parse tts) of
    Left e -> Err.printParseErr e >> abort f
    Right p -> pure p
  where
    formatParseErr e = let ss = lines e in (unlines ((head ss ++ " Error:") : tail ss))

typecheck' :: FilePath -> Parsed.Program -> IO Checked.Program
typecheck' f p = case typecheck p of

M carth.cabal => carth.cabal +11 -0
@@ 41,6 41,7 @@ library
      Lex
      Lexd
      Parse
      Parser
      Parsed
      Pretty
      Selections


@@ 69,6 70,7 @@ library
    , template-haskell
    , utf8-string
    , prettyprinter
    , parser-combinators
  default-language: Haskell2010

executable carth


@@ 94,6 96,14 @@ executable carth
    , process
    , template-haskell
    , utf8-string
    , parser-combinators

    -- Testing dependencies. I have them here to not require rebuild between `stack build`
    -- & `stack test`.
    , QuickCheck
    , hspec
    , hspec-discover
    , silently
  default-language: Haskell2010

test-suite carth-test


@@ 119,6 129,7 @@ test-suite carth-test
    , process
    , template-haskell
    , utf8-string
    , parser-combinators

    , QuickCheck
    , hspec

M src/Err.hs => src/Err.hs +3 -0
@@ 16,6 16,9 @@ import Gen

type Message = String

printParseErr :: (SrcPos, String) -> IO ()
printParseErr (p, msg) = posd p msg

printTypeErr :: TypeErr -> IO ()
printTypeErr = \case
    MainNotDefined -> putStrLn "Error: main not defined"

M src/Lex.hs => src/Lex.hs +32 -29
@@ 1,6 1,6 @@
{-# LANGUAGE FlexibleContexts, LambdaCase, TupleSections, DataKinds #-}

module Lex where
module Lex (lex, toplevel, tokentree) where

import Control.Monad
import Control.Monad.Except


@@ 16,6 16,8 @@ import Data.Set (Set)
import qualified Data.Set as Set
import System.FilePath
import System.Directory
import Data.Void
import Prelude hiding (lex)

import Misc
import SrcPos


@@ 24,18 26,18 @@ import Literate
import EnvVars


type Import = String
type Lexer = Parsec Void String

-- data Macro = Macro String deriving (Show)
type Import = String

data TopLevel = TImport Import -- | TMacro Macro
    | TTokenTree TokenTree


lex :: FilePath -> IO (Either String [TokenTree])
lex :: FilePath -> ExceptT String IO [TokenTree]
lex filepath = do
    modPaths <- modulePaths
    runExceptT (lexModules modPaths Set.empty [filepath])
    modPaths <- lift modulePaths
    lexModules modPaths Set.empty [filepath]

lexModules :: [FilePath] -> Set String -> [String] -> ExceptT String IO [TokenTree]
lexModules modPaths visiteds = \case


@@ 59,7 61,7 @@ lexModules modPaths visiteds = \case
        ttsNexts <- lexModules modPaths (Set.insert f visiteds) (impFs ++ nexts)
        pure (tts ++ ttsNexts)

toplevels :: Parser ([Import], [TokenTree])
toplevels :: Lexer ([Import], [TokenTree])
toplevels = do
    space
    tops <- many toplevel


@@ 72,12 74,12 @@ toplevels = do
        ([], [])
        tops

toplevel :: Parser TopLevel
toplevel :: Lexer TopLevel
toplevel = getSrcPos >>= \p -> parens
    (fmap TImport import' <|> fmap (TTokenTree . WithPos p . Parens) (many tokentree))
    where import' = keyword' "import" *> small

tokentree :: Parser TokenTree
tokentree :: Lexer TokenTree
tokentree = withPos tokentree'
  where
    tokentree' = choice


@@ 99,17 101,17 @@ tokentree = withPos tokentree'
        pure $ either ((\n -> Int (if neg then -n else n)) . fromIntegral)
                      (\x -> F64 (if neg then -x else x))
                      a
    -- ns_int = option id (char '-' $> negate) >>= \f -> fmap (f . fromIntegral) ns_nat
    ns_nat :: Parser Word
    ns_nat :: Lexer Word
    ns_nat = choice
        [string "0b" *> Lexer.binary, string "0x" *> Lexer.hexadecimal, Lexer.decimal]

strlit :: Parser String
strlit :: Lexer String
strlit = andSkipSpaceAfter ns_strlit
    where ns_strlit = char '"' >> manyTill Lexer.charLiteral (char '"')

keyword :: Parser Keyword
keyword = andSkipSpaceAfter $ choice
keyword :: Lexer Keyword
keyword = andSkipSpaceAfter $ choice $ map
    (\p -> try (p <* notFollowedBy identLetter))
    [ string ":" $> Kcolon
    , string "." $> Kdot
    , string "Fun" $> KFun


@@ 133,27 135,28 @@ keyword = andSkipSpaceAfter $ choice
    , string "Id@" $> KIdAt
    ]

keyword' :: String -> Parser ()
keyword' :: String -> Lexer ()
keyword' x = andSkipSpaceAfter $ label ("keyword " ++ x) (string x) $> ()

small, smallSpecial, smallNormal :: Parser String
small, smallSpecial, smallNormal :: Lexer String
small = smallSpecial <|> smallNormal
smallSpecial = keyword' "id@" *> strlit
smallNormal = andSkipSpaceAfter $ liftA2 (:) smallStart identRest
  where
    smallStart = lowerChar <|> otherChar <|> try (oneOf "-+" <* notFollowedBy digitChar)

big, bigSpecial, bigNormal :: Parser String
big = bigSpecial <|> bigNormal
bigSpecial, bigNormal :: Lexer String
bigSpecial = keyword' "id@" *> strlit
bigNormal = andSkipSpaceAfter $ liftA2 (:) bigStart identRest
    where bigStart = upperChar <|> char ':'

identRest :: Parser String
identRest :: Lexer String
identRest = many identLetter
    where identLetter = letterChar <|> otherChar <|> oneOf "-+:" <|> digitChar

otherChar :: Parser Char
identLetter :: Lexer Char
identLetter = letterChar <|> otherChar <|> oneOf "-+:" <|> digitChar

otherChar :: Lexer Char
otherChar = satisfy
    (\c -> and
        [ any ($ c) [isMark, isPunctuation, isSymbol]


@@ 162,32 165,32 @@ otherChar = satisfy
        ]
    )

parens, ns_parens :: Parser a -> Parser a
parens, ns_parens :: Lexer a -> Lexer a
parens = andSkipSpaceAfter . ns_parens
ns_parens = between (symbol "(") (string ")")

brackets, ns_brackets :: Parser a -> Parser a
brackets, ns_brackets :: Lexer a -> Lexer a
brackets = andSkipSpaceAfter . ns_brackets
ns_brackets = between (symbol "[") (string "]")

braces, ns_braces :: Parser a -> Parser a
braces, ns_braces :: Lexer a -> Lexer a
braces = andSkipSpaceAfter . ns_braces
ns_braces = between (symbol "{") (string "}")

andSkipSpaceAfter :: Parser a -> Parser a
andSkipSpaceAfter :: Lexer a -> Lexer a
andSkipSpaceAfter = Lexer.lexeme space

symbol :: String -> Parser String
symbol :: String -> Lexer String
symbol = Lexer.symbol space

-- | Spaces, line comments, and block comments
space :: Parser ()
space :: Lexer ()
space = Lexer.space Char.space1 (Lexer.skipLineComment ";") empty

withPos :: Parser a -> Parser (WithPos a)
withPos :: Lexer a -> Lexer (WithPos a)
withPos = liftA2 WithPos getSrcPos

getSrcPos :: Parser SrcPos
getSrcPos :: Lexer SrcPos
getSrcPos = fmap
    (\(SourcePos f l c) -> SrcPos f (fromIntegral (unPos l)) (fromIntegral (unPos c)))
    getSourcePos

M src/Lexd.hs => src/Lexd.hs +1 -1
@@ 22,7 22,7 @@ data Keyword
    | Klet1 | Klet | Kletrec
    | Ksizeof
    | KidAt | KIdAt
    deriving (Show)
    deriving (Eq, Show)

data TokenTree'
    = Lit Const

M src/Misc.hs => src/Misc.hs +18 -2
@@ 22,7 22,6 @@ import Data.Void
newtype TopologicalOrder a = Topo [a]
    deriving Show

type Parser = Parsec Void String
type Source = String




@@ 92,7 91,7 @@ splitOn sep = fromMaybe [] . Mega.parseMaybe splitOn'
        a <- many anySingle
        pure $ (as ++) $ if not (null a) then [a] else []

parse' :: Parser a -> FilePath -> Source -> Either String a
parse' :: Parsec Void String a -> FilePath -> Source -> Either String a
parse' p name src = first errorBundlePretty (Mega.parse p name src)

partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])


@@ 105,3 104,20 @@ partitionWith f = foldr

rightToMaybe :: Either a b -> Maybe b
rightToMaybe = either (const Nothing) Just

unsnoc :: [a] -> Maybe ([a], a)
unsnoc = \case
    [] -> Nothing
    x : xs -> case unsnoc xs of
        Just (ys, y) -> Just (x : ys, y)
        Nothing -> Just ([], x)

is2tup :: [a] -> Maybe (a, a)
is2tup = \case
    a1 : [a2] -> Just (a1, a2)
    _ -> Nothing

is3tup :: [a] -> Maybe (a, a, a)
is3tup = \case
    a1 : a2 : [a3] -> Just (a1, a2, a3)
    _ -> Nothing

M src/Parse.hs => src/Parse.hs +106 -256
@@ 7,111 7,46 @@
--       If a parser has a variant with a "ns_" prefix, that variant does not
--       consume succeding space, while the unprefixed variant does.

module Parse (Parser, Source, parse, parse', toplevels) where
module Parse (parse) where

import Control.Applicative hiding (many, some)
import Control.Monad
import Data.Char (isMark, isPunctuation, isSymbol, isUpper)
import Data.Functor
import Control.Monad.State
import Control.Monad.Except
import Control.Monad.Combinators
import Data.Maybe
import Control.Applicative (liftA2)
import Text.Megaparsec hiding (parse, match)
import Text.Megaparsec.Char hiding (space, space1)
import qualified Text.Megaparsec.Char as Char
import qualified Text.Megaparsec.Char.Lexer as Lexer
import Data.Set (Set)
import qualified Data.Set as Set
import Data.List
import System.FilePath
import System.Directory
import qualified Data.List.NonEmpty as NonEmpty
import Text.Read (readMaybe)

import Misc
import SrcPos
import Parsed
import Literate
import EnvVars


type Import = String


parse :: FilePath -> IO (Either String Program)
parse filepath = do
    let (dir, file) = splitFileName filepath
    let moduleName = dropExtension file
    r <- parseModule filepath dir moduleName Set.empty []
    pure (fmap (\(ds, ts, es) -> Program ds ts es) r)

parseModule
    :: FilePath
    -> FilePath
    -> String
    -> Set String
    -> [String]
    -> IO (Either String ([Def], [TypeDef], [Extern]))
parseModule filepath dir m visiteds nexts =
    let readModuleIn modPaths = do
            let fs = do
                    p <- modPaths
                    let pm = p </> m
                    fmap (addExtension pm) [".carth", ".org"]
            fs' <- filterM doesFileExist fs
            f <- case listToMaybe fs' of
                Nothing -> do
                    putStrLn
                        $ ("Error: No file for module " ++ m)
                        ++ (" exists.\nSearched paths: " ++ show modPaths)
                    abort filepath
                Just f' -> pure f'
            s <- readFile f
            s' <- if takeExtension f == ".org"
                then do
                    let s' = untangleOrg s
                    writeFile (addExtension m "untangled") s'
                    pure s'
                else pure s
            pure (s', f)

        advance (is, ds, ts, es) = case (is ++ nexts) of
            [] -> pure (Right (ds, ts, es))
            next : nexts' -> fmap
                (fmap (\(ds', ts', es') -> (ds ++ ds', ts ++ ts', es ++ es')))
                (parseModule filepath dir next (Set.insert m visiteds) nexts')
    in  if Set.member m visiteds
            then advance ([], [], [], [])
            else do
             -- TODO: make dir absolute to make debug work when binary is moved?
                modPaths <- fmap (dir :) modulePaths
                (src, f) <- readModuleIn modPaths
                case parse' toplevels f src of
                    Left e -> pure (Left e)
                    Right r -> advance r

toplevels :: Parser ([Import], [Def], [TypeDef], [Extern])
toplevels = do
    space
    r <- option ([], [], [], []) (toplevel >>= flip fmap toplevels)
    eof
    pure r
import Lexd hiding (Big, Small)
import qualified Lexd
import Parser
import Parsed hiding (Lit)
import qualified Parsed
import Pretty


parse :: [TokenTree] -> Except (SrcPos, String) Program
parse tts = fmap (\(ds, ts, es) -> Program ds ts es) (runParser toplevels tts)

toplevels :: Parser ([Def], [TypeDef], [Extern])
toplevels = fmap mconcat (manyTill toplevel end)
  where
    toplevel = do
        topPos <- getSrcPos
        parens $ choice
            [ fmap (\i (is, ds, ts, es) -> (i : is, ds, ts, es)) import'
            , fmap (\d (is, ds, ts, es) -> (is, d : ds, ts, es)) (def topPos)
            , fmap (\t (is, ds, ts, es) -> (is, ds, t : ts, es)) typedef
            , fmap (\e (is, ds, ts, es) -> (is, ds, ts, e : es)) extern
            ]

import' :: Parser Import
import' = reserved "import" *> fmap idstr small
    toplevel = parens' $ \topPos -> choice
        [ fmap (\d -> ([d], [], [])) (def topPos)
        , fmap (\t -> ([], [t], [])) typedef
        , fmap (\e -> ([], [], [e])) extern
        ]

extern :: Parser Extern
extern = reserved "extern" *> liftA2 Extern small type_
extern = reserved Kextern *> liftA2 Extern small type_

typedef :: Parser TypeDef
typedef = do
    _ <- reserved "data"
    _ <- reserved Kdata
    let onlyName = fmap (, []) big
    let nameAndSome = parens . liftA2 (,) big . some
    (name, params) <- onlyName <|> nameAndSome small


@@ 122,10 57,10 @@ def :: SrcPos -> Parser Def
def topPos = defUntyped topPos <|> defTyped topPos

defUntyped :: SrcPos -> Parser Def
defUntyped pos = reserved "define" *> def' (pure Nothing) pos
defUntyped pos = reserved Kdefine *> def' (pure Nothing) pos

defTyped :: SrcPos -> Parser Def
defTyped pos = reserved "define:" *> def' (fmap Just scheme) pos
defTyped pos = reserved KdefineColon *> def' (fmap Just scheme) pos

def'
    :: Parser (Maybe Scheme)


@@ 133,10 68,10 @@ def'
    -> Parser (Id 'Small, (WithPos (Maybe Scheme, Expr)))
def' schemeParser topPos = varDef <|> funDef
  where
    parenDef = try (getSrcPos >>= (parens . def))
    body = withPos $ do
    parenDef = try (parens' def)
    body = do
        ds <- many parenDef
        if null ds then expr' else fmap (LetRec ds) expr
        if null ds then expr else fmap (\b -> WithPos (getPos b) (LetRec ds b)) expr
    varDef = do
        name <- small
        scm <- schemeParser


@@ 158,9 93,11 @@ data BindingLhs
    | CaseVarLhs Pat

expr' :: Parser Expr'
expr' = choice [var, estr, num, eConstructor, etuple, pexpr]
expr' = choice [var, lit, eConstructor, etuple, pexpr]
  where
    estr = fmap (Lit . Str) strlit
    lit = token "constant literal" $ const $ \case
        Lit c -> Just (Parsed.Lit c)
        _ -> Nothing
    eConstructor = fmap Ctor big
    -- FIXME: These positions are completely wack. Gotta get a separate variant in the AST
    --        for pairs. Similar to Box.


@@ 179,23 116,23 @@ expr' = choice [var, estr, num, eConstructor, etuple, pexpr]
                              r
                          )
    var = fmap Var small
    pexpr = getSrcPos >>= \p -> parens $ choice
    pexpr = parens' $ \p -> choice
        [funMatch, match, if', fun, let1 p, let', letrec, typeAscr, sizeof, app]
    funMatch = reserved "fmatch" *> fmap FunMatch cases
    match = reserved "match" *> liftA2 Match expr cases
    cases = many (parens (reserved "case" *> (liftA2 (,) pat expr)))
    if' = reserved "if" *> liftM3 If expr expr expr
    funMatch = reserved Kfmatch *> fmap FunMatch cases
    match = reserved Kmatch *> liftA2 Match expr cases
    cases = many (parens (reserved Kcase *> (liftA2 (,) pat expr)))
    if' = reserved Kif *> liftM3 If expr expr expr
    fun = do
        reserved "fun"
        reserved Kfun
        params <- parens (some pat)
        body <- expr
        pure $ unpos $ foldr (\p b -> WithPos (getPos p) (FunMatch [(p, b)])) body params
    let1 p = reserved "let1" *> (varLhs <|> try funLhs <|> caseVarLhs) >>= \case
    let1 p = reserved Klet1 *> (varLhs <|> try funLhs <|> caseVarLhs) >>= \case
        VarLhs lhs -> liftA2 Let1 (varBinding p lhs) expr
        FunLhs name params -> liftA2 Let1 (funBinding p name params) expr
        CaseVarLhs lhs -> liftA2 Match expr (fmap (pure . (lhs, )) expr)
    let' = do
        reserved "let"
        reserved Klet
        bs <- parens (many pbinding)
        e <- expr
        pure $ unpos $ foldr


@@ 206,14 143,14 @@ expr' = choice [var, estr, num, eConstructor, etuple, pexpr]
            e
            bs
      where
        pbinding = getSrcPos >>= parens . binding
        pbinding = parens' binding
        binding p = (varLhs <|> try funLhs <|> caseVarLhs) >>= \case
            VarLhs lhs -> fmap Left (varBinding p lhs)
            FunLhs name params -> fmap Left (funBinding p name params)
            CaseVarLhs lhs -> fmap (Right . (lhs, )) expr
    letrec = reserved "letrec" *> liftA2 LetRec (parens (many pbinding)) expr
    letrec = reserved Kletrec *> liftA2 LetRec (parens (many pbinding)) expr
      where
        pbinding = getSrcPos >>= parens . binding
        pbinding = parens' binding
        binding p = (varLhs <|> funLhs) >>= \case
            VarLhs lhs -> varBinding p lhs
            FunLhs name params -> funBinding p name params


@@ 228,59 165,36 @@ expr' = choice [var, estr, num, eConstructor, etuple, pexpr]
        b <- expr
        let f = foldr (WithPos pos . FunMatch . pure .* (,)) b params
        pure (name, WithPos pos (Nothing, f))
    typeAscr = reserved ":" *> liftA2 TypeAscr expr type_
    sizeof = reserved "sizeof" *> fmap Sizeof type_
    typeAscr = reserved Kcolon *> liftA2 TypeAscr expr type_
    sizeof = reserved Ksizeof *> fmap Sizeof type_
    app = do
        rator <- expr
        rands <- some expr
        pure (unpos (foldl' (WithPos (getPos rator) .* App) rator rands))

num :: Parser Expr'
num = andSkipSpaceAfter ns_num

ns_num :: Parser Expr'
ns_num = do
    neg <- option False (char '-' $> True)
    a <- eitherP (try (ns_nat <* notFollowedBy (char '.'))) Lexer.float
    let e = either ((\n -> Int (if neg then -n else n)) . fromIntegral)
                   (\x -> F64 (if neg then -x else x))
                   a
    pure (Lit e)

int :: Parser Int
int = andSkipSpaceAfter $ option id (char '-' $> negate) >>= \f ->
    fmap (f . fromIntegral) ns_nat

ns_nat :: Parser Word
ns_nat =
    choice [string "0b" *> Lexer.binary, string "0x" *> Lexer.hexadecimal, Lexer.decimal]

strlit :: Parser String
strlit = andSkipSpaceAfter ns_strlit

ns_strlit :: Parser String
ns_strlit = char '"' >> manyTill Lexer.charLiteral (char '"')

pat :: Parser Pat
pat = choice [patInt, patStr, patCtor, patVar, patTuple, ppat]
  where
    patInt = liftA2 PInt getSrcPos int
    patInt = token "integer literal" $ \p -> \case
        Lit (Int x) -> Just (PInt p x)
        _ -> Nothing
    patStr = liftA2 PStr getSrcPos strlit
    strlit = token "string literal" $ const $ \case
        Lit (Str s) -> Just s
        _ -> Nothing
    patCtor = fmap (\x -> PConstruction (getPos x) x []) big
    patVar = fmap PVar small
    patTuple = tuple pat (\p -> PConstruction p (Id (WithPos p "Unit")) [])
        $ \l r -> let p = getPos l in PConstruction p (Id (WithPos p "Cons")) [l, r]
    ppat = do
        pos <- getSrcPos
        parens (choice [patBox pos, patCtion pos])
    patBox pos = reserved "Box" *> fmap (PBox pos) pat
    ppat = parens' $ \pos -> (choice [patBox pos, patCtion pos])
    patBox pos = reserved KBox *> fmap (PBox pos) pat
    patCtion pos = liftM3 PConstruction (pure pos) big (some pat)

scheme :: Parser Scheme
scheme = do
    pos <- getSrcPos
    let wrap = fmap (Forall pos Set.empty)
        universal = reserved "forall" *> liftA2 (Forall pos) tvars type_
        universal = reserved Kforall *> liftA2 (Forall pos) tvars type_
        tvars = parens (fmap Set.fromList (many tvar))
    wrap nonptype <|> (parens (universal <|> wrap ptype))



@@ 291,149 205,85 @@ nonptype :: Parser Type
nonptype = choice
    [fmap TPrim tprim, fmap TVar tvar, fmap (TConst . (, []) . idstr) big, ttuple]
  where
    tprim = try $ andSkipSpaceAfter
        (choice
                [ string "Nat" *> (fmap TNat Lexer.decimal <|> pure TNatSize)
                , string "Int" *> (fmap TInt Lexer.decimal <|> pure TIntSize)
                , string "F16" $> TF16
                , string "F32" $> TF32
                , string "F64" $> TF64
                , string "F128" $> TF128
                ]
        <* notFollowedBy identLetter
        )
    tprim = token "primitive type" $ const $ \case
        Lexd.Big ('N' : 'a' : 't' : s) | isWord s -> Just (TNat (read s))
        Lexd.Big ('I' : 'n' : 't' : s) | isWord s -> Just (TInt (read s))
        Lexd.Big "Nat" -> Just TNatSize
        Lexd.Big "Int" -> Just TIntSize
        Lexd.Big "F16" -> Just TF16
        Lexd.Big "F32" -> Just TF32
        Lexd.Big "F64" -> Just TF64
        Lexd.Big "F128" -> Just TF128
        _ -> Nothing
    ttuple = tuple type_ (const (TConst ("Unit", []))) $ \l r -> TConst ("Cons", [l, r])

-- | FIXME: Positions in here are kind of bad
tuple :: Parser a -> (SrcPos -> a) -> (a -> a -> a) -> Parser a
tuple p unit f = brackets $ do
    a <- p
    as <- many (try p)
    let ls = a : as
    pos <- getSrcPos
    r <- option (unit pos) (try (reserved "." *> p))
    pos <- gets stOuterPos
    r <- option (unit pos) (try (reserved Kdot *> p))
    pure $ foldr f r ls

ptype :: Parser Type
ptype = choice [tfun, tbox, tapp]
  where
    tfun = do
        reserved "Fun"
        reserved KFun
        t <- type_
        ts <- some type_
        pure (foldr1 TFun (t : ts))
    tbox = reserved "Box" *> fmap TBox type_
    tbox = reserved KBox *> fmap TBox type_
    tapp = liftA2 (TConst .* (,) . idstr) big (some type_)

tvar :: Parser TVar
tvar = fmap TVExplicit small

parens :: Parser a -> Parser a
parens = andSkipSpaceAfter . ns_parens
parens ma = parens' (const ma)

ns_parens :: Parser a -> Parser a
ns_parens = between (symbol "(") (string ")")
parens' :: (SrcPos -> Parser a) -> Parser a
parens' = sexpr "`(`" $ \case
    Parens tts -> Just tts
    _ -> Nothing

brackets :: Parser a -> Parser a
brackets = andSkipSpaceAfter . ns_brackets

ns_brackets :: Parser a -> Parser a
ns_brackets = between (symbol "[") (string "]")
brackets ma = brackets' (const ma)

brackets' :: (SrcPos -> Parser a) -> Parser a
brackets' = sexpr "`[`" $ \case
    Brackets tts -> Just tts
    _ -> Nothing

sexpr :: String -> (TokenTree' -> Maybe [TokenTree]) -> (SrcPos -> Parser a) -> Parser a
sexpr expected extract f = do
    (p, tts) <- token expected $ \p' -> fmap (p', ) . extract
    St _ pOld ttsOld <- get
    modify (\st -> st { stOuterPos = p, stInput = tts })
    a <- f p
    end
    modify (\st -> st { stOuterPos = pOld, stInput = ttsOld })
    pure a

big :: Parser (Id 'Big)
big = fmap Id (special <|> normal)
  where
    special = reserved "Id@" *> withPos strlit
    normal = withPos $ try $ do
        s <- identifier
        let c = head s
        if (isUpper c || [c] == ":")
            then pure s
            else fail "Big identifier must start with an uppercase letter or colon."
big = token "big identifier" $ \p -> \case
    Lexd.Big x -> Just (Id (WithPos p x))
    _ -> Nothing

small :: Parser (Id 'Small)
small = fmap Id (special <|> normal)
  where
    special = reserved "id@" *> withPos strlit
    normal = withPos $ try $ do
        s <- identifier
        let c = head s
        if (isUpper c || [c] == ":")
            then fail "Small identifier must not start with an uppercase letter or colon."
            else pure s

identifier :: Parser String
identifier = do
    name <- ident
    if elem name reserveds
        then unexpected (Label (NonEmpty.fromList ("reserved word " ++ show name)))
        else pure name

ident :: Parser String
ident = andSkipSpaceAfter ns_ident

ns_ident :: Parser String
ns_ident = label "identifier" $ liftA2 (:) identStart (many identLetter)
  where
    identStart =
        choice [letterChar, otherChar, try (oneOf "-+" <* notFollowedBy digitChar)]

identLetter :: Parser Char
identLetter = letterChar <|> otherChar <|> oneOf "-+" <|> digitChar

reserved :: String -> Parser ()
reserved x = do
    if not (elem x reserveds)
        then ice ("`" ++ x ++ "` is not a reserved word")
        else label ("reserved word " ++ x) (try (mfilter (== x) ident)) $> ()

andSkipSpaceAfter :: Parser a -> Parser a
andSkipSpaceAfter = Lexer.lexeme space

symbol :: String -> Parser String
symbol = Lexer.symbol space

reserveds :: [String]
reserveds =
    [ ":"
    , "."
    , "Fun"
    , "Box"
    , "define"
    , "define:"
    , "extern"
    , "forall"
    , "fmatch"
    , "match"
    , "if"
    , "fun"
    , "let1"
    , "let"
    , "letrec"
    , "data"
    , "sizeof"
    , "import"
    , "case"
    , "id@"
    , "Id@"
    ]

otherChar :: Parser Char
otherChar = satisfy
    (\c -> and
        [ any ($ c) [isMark, isPunctuation, isSymbol]
        , not (elem c "()[]{}")
        , not (elem c "\"-+")
        ]
    )
small = token "small identifier" $ \p -> \case
    Lexd.Small x -> Just (Id (WithPos p x))
    _ -> Nothing

-- | Spaces, line comments, and block comments
space :: Parser ()
space = Lexer.space Char.space1 (Lexer.skipLineComment ";") empty
reserved :: Keyword -> Parser ()
reserved k = token ("keyword " ++ pretty k) $ const $ \case
    Keyword k' | k == k' -> Just ()
    _ -> Nothing

isWord :: String -> Bool
isWord s = isJust ((readMaybe s) :: Maybe Word)

withPos :: Parser a -> Parser (WithPos a)
withPos = liftA2 WithPos getSrcPos

getSrcPos :: Parser SrcPos
getSrcPos = fmap
    (\(SourcePos f l c) -> SrcPos f (fromIntegral (unPos l)) (fromIntegral (unPos c)))
    getSourcePos

M src/Parsed.hs => src/Parsed.hs +4 -8
@@ 2,17 2,18 @@
           , MultiParamTypeClasses, KindSignatures, DataKinds
           , DeriveDataTypeable #-}

module Parsed (module Parsed, TPrim(..), TConst) where
module Parsed (module Parsed, Const (..), TPrim(..), TConst) where

import qualified Data.Set as Set
import Data.Set (Set)
import Data.Bifunctor
import Control.Arrow ((>>>))

import Data.Data

import SrcPos
import FreeVars
import TypeAst
import Lexd (Const (..))


data IdCase = Big | Small


@@ 32,6 33,7 @@ data Type
    = TVar TVar
    | TPrim TPrim
    | TConst (TConst Type)
    -- TODO: Remove special case for these two? Is it really needed?
    | TFun Type Type
    | TBox Type
    deriving (Show, Eq, Ord, Data)


@@ 47,12 49,6 @@ data Pat
    | PBox SrcPos Pat
    deriving Show

data Const
    = Int Int
    | F64 Double
    | Str String
    deriving (Show, Eq)

data Expr'
    = Lit Const
    | Var (Id 'Small)

A src/Parser.hs => src/Parser.hs +94 -0
@@ 0,0 1,94 @@
{-# LANGUAGE FlexibleContexts, LambdaCase, TupleSections, DataKinds
           , GeneralizedNewtypeDeriving #-}

module Parser (Parser, St (..), runParser, token, end, lookAhead, try, getSrcPos) where

import Control.Applicative hiding (many, some)
import Control.Monad
import Control.Monad.State
import Control.Monad.Except
import Data.Functor
import Data.List
import Data.Set (Set)
import qualified Data.Set as Set

import Misc
import SrcPos
import Lexd

data Err = Err { errLength :: Word, errPos :: SrcPos, errExpecteds :: Set String }
    deriving (Show, Eq)

-- TODO: Semigroup instance for the error type should select the one with the longest
--       match. We need to keep track of how long the match is.
--
--       If two matches are of the same / no length, combine the sets of "expected"s of
--       both. So it's like "expected keyword extern or keyword data".
instance Semigroup Err where
    (<>) e1 e2
        | e2 == mempty = e1
        | errLength e1 > errLength e2 = e1
        | errLength e1 < errLength e2 = e2
        | otherwise = Err (errLength e2)
                          (errPos e2)
                          (Set.union (errExpecteds e1) (errExpecteds e2))

instance Monoid Err where
    mempty = Err 0 (SrcPos "<dummy>" 0 0) Set.empty

data St = St { stCount :: Word, stOuterPos :: SrcPos, stInput :: [TokenTree] }

newtype Parser a = Parser (StateT St (Except Err) a)
    deriving (Functor, Applicative, MonadPlus, Monad, MonadError Err, MonadState St)

instance Alternative Parser where
    empty = Parser (throwError mempty)
    (<|>) ma mb = do
        n <- gets stCount
        catchError ma $ \e -> if errLength e > n
            then throwError e
            else catchError mb (throwError . (e <>))

runParser :: Parser a -> [TokenTree] -> Except (SrcPos, String) a
runParser (Parser ma) tts =
    let noPos = ice "read SrcPos in parser state at top level"
        initSt = St 0 noPos tts
        formatExpecteds es = case Set.toList es of
            [] -> ice "empty list of expecteds in formatExpecteds"
            [e] -> "Expected " ++ e
            es' -> "Expected one of: " ++ intercalate ", " es'
    in  withExcept (\(Err _ pos es) -> (pos, formatExpecteds es)) (evalStateT ma initSt)

token :: String -> (SrcPos -> TokenTree' -> Maybe a) -> Parser a
token exp f = do
    St n outerPos xs <- get
    case xs of
        WithPos innerPos x : xs' -> do
            a <- mexcept (Err n innerPos (Set.singleton exp)) (f innerPos x)
            modify (\st -> st { stCount = n + 1, stInput = xs' })
            pure a
        [] -> throwError
            $ Err n outerPos (Set.singleton "continuation of token sequence")

-- | Succeeds only when current input sequence (may be nested in sexpr) is empty
end :: Parser ()
end = get >>= \(St n _ inp) -> case inp of
    [] -> pure ()
    WithPos p _ : _ ->
        throwError (Err n p (Set.singleton "end of (nested) token sequence"))

mexcept :: MonadError e m => e -> Maybe a -> m a
mexcept e = maybe (throwError e) pure

lookAhead :: Parser a -> Parser a
lookAhead pa = get >>= \s -> pa >>= \a -> put s $> a

try :: Parser a -> Parser a
try ma = do
    s <- get
    catchError ma $ \e -> do
        put s
        throwError (e { errLength = stCount s })

getSrcPos :: Parser SrcPos
getSrcPos = lookAhead (token "token" (Just .* const))

M src/Pretty.hs => src/Pretty.hs +26 -0
@@ 13,6 13,7 @@ import qualified Data.Text.Prettyprint.Doc as Prettyprint

import Misc
import SrcPos
import qualified Lexd
import qualified Parsed
import qualified Inferred
import qualified Optimized as Ast


@@ 33,6 34,31 @@ instance Pretty a => Pretty (WithPos a) where
    pretty' d = pretty' d . unpos


instance Pretty Lexd.Keyword where
    pretty' _ = \case
        Lexd.Kcolon -> ":"
        Lexd.Kdot -> "."
        Lexd.Kforall -> "forall"
        Lexd.KFun -> "Fun"
        Lexd.KBox -> "Box"
        Lexd.Kdefine -> "define"
        Lexd.KdefineColon -> "define:"
        Lexd.Kimport -> "import"
        Lexd.Kextern -> "extern"
        Lexd.Kdata -> "data"
        Lexd.Kfmatch -> "fmatch"
        Lexd.Kmatch -> "match"
        Lexd.Kcase -> "case"
        Lexd.Kif -> "if"
        Lexd.Kfun -> "fun"
        Lexd.Klet1 -> "let1"
        Lexd.Klet -> "let"
        Lexd.Kletrec -> "letrec"
        Lexd.Ksizeof -> "sizeof"
        Lexd.KidAt -> "id@"
        Lexd.KIdAt -> "Id@"


instance Pretty Parsed.Program where
    pretty' = prettyProg
instance Pretty Parsed.Extern where

M test/SystemSpec.hs => test/SystemSpec.hs +16 -5
@@ 2,9 2,12 @@

module SystemSpec where

import Prelude hiding (lex)

import Data.Data
import Data.Functor
import Control.Monad
import Control.Monad.Except
import System.Directory
import System.FilePath
import Data.List


@@ 12,7 15,9 @@ import Test.Hspec
import System.IO.Silently

import Misc
import Lex
import Parse
import qualified Parsed
import Check
import Compile
import Monomorphize


@@ 35,9 40,9 @@ spec = do
        fs <- runIO $ listDirectory d <&> filter isSourceFile
        forM_ fs $ \f -> do
            expectedErr <- runIO $ fmap (drop 3 . head . lines) (readFile (d </> f))
            result <- runIO $ parse (d </> f)
            result <- runIO $ lexAndParse (d </> f)
            it (dropExtension f) $ shouldSatisfy (fmap typecheck result) $ \case
                Right (Left e) -> show (toConstr e) == expectedErr
                Just (Left e) -> show (toConstr e) == expectedErr
                _ -> False
    describe "Examples compile" $ do
        let d = "examples"


@@ 68,6 73,12 @@ compile' f =
            Just ast -> compile f cfg ast $> True

frontend :: FilePath -> IO (Maybe Ast.Program)
frontend f = parse f <&> \case
    Left _ -> Nothing
    Right ast -> fmap (optimize . monomorphize) (rightToMaybe (typecheck ast))
frontend f = lexAndParse f <&> \case
    Nothing -> Nothing
    Just ast -> fmap (optimize . monomorphize) (rightToMaybe (typecheck ast))

lexAndParse :: FilePath -> IO (Maybe Parsed.Program)
lexAndParse f = fmap rightToMaybe (runExceptT (lex' f >>= parse''))
  where
    lex' = withExceptT (const ()) . lex
    parse'' = withExceptT (const ()) . liftEither . runExcept . parse