~jojo/Carth

6e4513a8924dc07557cbe9ccde67fb19868b1531 — JoJo 1 year, 1 month ago a07d1ce
Add primitive, single pattern macros

E.g. `(defmacro (plus a b) (+ a b))`
10 files changed, 201 insertions(+), 79 deletions(-)

M TODO.org
M app/Main.hs
M carth.cabal
M src/Err.hs
M src/Lex.hs
M src/Lexd.hs
A src/Macro.hs
M src/Parse.hs
M src/Parser.hs
M src/Pretty.hs
M TODO.org => TODO.org +27 -0
@@ 705,3 705,30 @@ Features and other stuff to do/implement in/around Carth.
  negatives. I feel it would be nice as a user to be able to inspect
  the .carth source of the stdlib and actually see all the types and
  stuff though.
* INACTIVE Union types
  Like Typescript (I think, I'm not all that familiar with it). Could
  be nice for error handling, for example. That's one of the problems
  in Rust -- you have to use all these fancy crates or write a bunch
  of boilerplate just to allow a function to return two different
  types of errors.

  Java, where exceptions can be combined as a union, essentially:
  #+BEGIN_SRC java
  public Foo foo() throws SomeException, OtherException {
      bar(); // throws SomeException
      baz(); // throws OtherException
  }
  #+END_SRC

  and Rust, where you have to combine the different types somehow:
  #+BEGIN_SRC rust
  fn foo() -> Result<Foo, MyErr> {
      bar().map_err(MySomeErr)?;
      baz().map_err(MyOtherErr)?;
  }

  enum MyErr {
      MySomeErr(SomeErr),
      MyOtherErr(OtherErr)
  }
  #+END_SRC

M app/Main.hs => app/Main.hs +9 -1
@@ 22,6 22,7 @@ import Optimize
import qualified Optimized as Ast
import qualified Parse
import qualified Lex
import qualified Macro
import EnvVars

main :: IO ()


@@ 59,8 60,10 @@ frontend cfg f = do
    verbose cfg ("   Lexing")
    tts <- lex f
    when d $ writeFile ".dbg.lexd" (show tts)
    verbose cfg ("   Expanding macros")
    tts' <- expandMacros f tts
    verbose cfg ("   Parsing")
    ast <- parse f tts
    ast <- parse f tts'
    when d $ writeFile ".dbg.parsed" (pretty ast)
    verbose cfg ("   Typechecking")
    ann <- typecheck' f ast


@@ 79,6 82,11 @@ lex f = runExceptT (Lex.lex f) >>= \case
  where
    formatLexErr e = let ss = lines e in (unlines ((head ss ++ " Error:") : tail ss))

expandMacros :: FilePath -> [Lexd.TokenTree] -> IO [Lexd.TokenTree]
expandMacros f tts = case runExcept (Macro.expandMacros tts) of
    Left e -> Err.printMacroErr e >> abort f
    Right p -> pure p

parse :: FilePath -> [Lexd.TokenTree] -> IO Parsed.Program
parse f tts = case runExcept (Parse.parse tts) of
    Left e -> Err.printParseErr e >> abort f

M carth.cabal => carth.cabal +1 -0
@@ 40,6 40,7 @@ library
      Optimized
      Lex
      Lexd
      Macro
      Parse
      Parser
      Parsed

M src/Err.hs => src/Err.hs +3 -0
@@ 16,6 16,9 @@ import Gen

type Message = String

printMacroErr :: (SrcPos, String) -> IO ()
printMacroErr (p, msg) = posd p msg

printParseErr :: (SrcPos, String) -> IO ()
printParseErr (p, msg) = posd p msg


M src/Lex.hs => src/Lex.hs +33 -24
@@ 1,5 1,12 @@
{-# LANGUAGE FlexibleContexts, LambdaCase, TupleSections, DataKinds #-}

-- Note: Some parsers are greedy wrt consuming spaces and comments succeding the
--       item, while others are lazy. You'll have to look at the impl to be
--       sure.
--
--       If a parser has a variant with a "ns_" prefix, that variant does not
--       consume succeding space, while the unprefixed variant does.

module Lex (lex, toplevel, tokentree) where

import Control.Monad


@@ 110,30 117,32 @@ strlit = andSkipSpaceAfter ns_strlit
    where ns_strlit = char '"' >> manyTill Lexer.charLiteral (char '"')

keyword :: Lexer Keyword
keyword = andSkipSpaceAfter $ choice $ map
    (\p -> try (p <* notFollowedBy identLetter))
    [ string ":" $> Kcolon
    , string "." $> Kdot
    , string "Fun" $> KFun
    , string "Box" $> KBox
    , string "define" $> Kdefine
    , string "define:" $> KdefineColon
    , string "extern" $> Kextern
    , string "forall" $> Kforall
    , string "fmatch" $> Kfmatch
    , string "match" $> Kmatch
    , string "if" $> Kif
    , string "fun" $> Kfun
    , string "let1" $> Klet1
    , string "let" $> Klet
    , string "letrec" $> Kletrec
    , string "data" $> Kdata
    , string "sizeof" $> Ksizeof
    , string "import" $> Kimport
    , string "case" $> Kcase
    , string "id@" $> KidAt
    , string "Id@" $> KIdAt
    ]
keyword = andSkipSpaceAfter $ choice $ (++)
    (map
        (\p -> try (p <* notFollowedBy identLetter))
        [ string ":" $> Kcolon
        , string "." $> Kdot
        , string "Fun" $> KFun
        , string "Box" $> KBox
        , string "define" $> Kdefine
        , string "define:" $> KdefineColon
        , string "extern" $> Kextern
        , string "forall" $> Kforall
        , string "fmatch" $> Kfmatch
        , string "match" $> Kmatch
        , string "if" $> Kif
        , string "fun" $> Kfun
        , string "let1" $> Klet1
        , string "let" $> Klet
        , string "letrec" $> Kletrec
        , string "data" $> Kdata
        , string "sizeof" $> Ksizeof
        , string "import" $> Kimport
        , string "case" $> Kcase
        , string "defmacro" $> Kdefmacro
        ]
    )
    [string "id@" $> KidAt, string "Id@" $> KIdAt]

keyword' :: String -> Lexer ()
keyword' x = andSkipSpaceAfter $ label ("keyword " ++ x) (string x) $> ()

M src/Lexd.hs => src/Lexd.hs +1 -0
@@ 22,6 22,7 @@ data Keyword
    | Klet1 | Klet | Kletrec
    | Ksizeof
    | KidAt | KIdAt
    | Kdefmacro
    deriving (Eq, Show)

data TokenTree'

A src/Macro.hs => src/Macro.hs +70 -0
@@ 0,0 1,70 @@
{-# LANGUAGE LambdaCase #-}

module Macro (expandMacros) where

import Control.Applicative
import Control.Monad.Except
import Control.Monad.Reader
import Control.Monad.State
import Data.Map (Map)
import qualified Data.Map as Map

import SrcPos
import Lexd
import Parser

type Macros = Map String ([String], [TokenTree])
type Bindings = Map String TokenTree
type Expand = ReaderT Bindings (StateT Macros (Except (SrcPos, String)))

expandMacros :: [TokenTree] -> Except (SrcPos, String) [TokenTree]
expandMacros tts = evalStateT (runReaderT (toplevels tts) Map.empty) Map.empty

toplevels :: [TokenTree] -> Expand [TokenTree]
toplevels = fmap concat . mapM toplevel

toplevel :: TokenTree -> Expand [TokenTree]
toplevel = \case
    WithPos _ (Parens (WithPos _ (Keyword Kdefmacro) : tts)) -> do
        def <- lift $ lift $ runParser pdefmacro tts
        modify (uncurry Map.insert def)
        pure []
    tt -> expand tt

pdefmacro :: Parser (String, ([String], [TokenTree]))
pdefmacro = do
    (x, params) <- parens (liftA2 (,) small' (many small'))
    template <- many anyToken
    pure (x, (params, template))

expand :: TokenTree -> Expand [TokenTree]
expand tt@(WithPos tpos tt') = do
    bs <- ask
    ms <- get
    case tt' of
        Lit _ -> pure [tt]
        Small x -> case Map.lookup x bs of
            Just xtt -> pure [xtt]
            Nothing -> pure [tt]
        Big _ -> pure [tt]
        Keyword _ -> pure [tt]
        Parens (WithPos _ (Small x) : tts) | Just m <- Map.lookup x ms -> do
            tts' <- expands tts
            applyMacro tpos m tts'
        Parens tts -> par Parens tts
        Brackets tts -> par Brackets tts
        Braces tts -> par Braces tts
    where par ctor tts = fmap (pure . WithPos tpos . ctor) (expands tts)

expands :: [TokenTree] -> Expand [TokenTree]
expands = fmap concat . mapM expand

applyMacro :: SrcPos -> ([String], [TokenTree]) -> [TokenTree] -> Expand [TokenTree]
applyMacro appPos (params, template) args = if length params /= length args
    then throwError
        ( appPos
        , "Arity mismatch in application of macro.\n"
        ++ ("Expected " ++ show (length params))
        ++ (", found " ++ show (length args))
        )
    else local (Map.union (Map.fromList (zip params args))) (expands template)

M src/Parse.hs => src/Parse.hs +0 -53
@@ 1,12 1,5 @@
{-# LANGUAGE FlexibleContexts, LambdaCase, TupleSections, DataKinds #-}

-- Note: Some parsers are greedy wrt consuming spaces and comments succeding the
--       item, while others are lazy. You'll have to look at the impl to be
--       sure.
--
--       If a parser has a variant with a "ns_" prefix, that variant does not
--       consume succeding space, while the unprefixed variant does.

module Parse (parse) where

import Control.Applicative hiding (many, some)


@@ 26,8 19,6 @@ import qualified Lexd
import Parser
import Parsed hiding (Lit)
import qualified Parsed
import Pretty


parse :: [TokenTree] -> Except (SrcPos, String) Program
parse tts = fmap (\(ds, ts, es) -> Program ds ts es) (runParser toplevels tts)


@@ 241,49 232,5 @@ ptype = choice [tfun, tbox, tapp]
tvar :: Parser TVar
tvar = fmap TVExplicit small

parens :: Parser a -> Parser a
parens ma = parens' (const ma)

parens' :: (SrcPos -> Parser a) -> Parser a
parens' = sexpr "`(`" $ \case
    Parens tts -> Just tts
    _ -> Nothing

brackets :: Parser a -> Parser a
brackets ma = brackets' (const ma)

brackets' :: (SrcPos -> Parser a) -> Parser a
brackets' = sexpr "`[`" $ \case
    Brackets tts -> Just tts
    _ -> Nothing

sexpr :: String -> (TokenTree' -> Maybe [TokenTree]) -> (SrcPos -> Parser a) -> Parser a
sexpr expected extract f = do
    (p, tts) <- token expected $ \p' -> fmap (p', ) . extract
    St _ pOld ttsOld <- get
    modify (\st -> st { stOuterPos = p, stInput = tts })
    a <- f p
    end
    modify (\st -> st { stOuterPos = pOld, stInput = ttsOld })
    pure a

big :: Parser (Id 'Big)
big = token "big identifier" $ \p -> \case
    Lexd.Big x -> Just (Id (WithPos p x))
    _ -> Nothing

small :: Parser (Id 'Small)
small = token "small identifier" $ \p -> \case
    Lexd.Small x -> Just (Id (WithPos p x))
    _ -> Nothing

reserved :: Keyword -> Parser ()
reserved k = token ("keyword " ++ pretty k) $ const $ \case
    Keyword k' | k == k' -> Just ()
    _ -> Nothing

isWord :: String -> Bool
isWord s = isJust ((readMaybe s) :: Maybe Word)

withPos :: Parser a -> Parser (WithPos a)
withPos = liftA2 WithPos getSrcPos

M src/Parser.hs => src/Parser.hs +56 -1
@@ 1,7 1,7 @@
{-# LANGUAGE FlexibleContexts, LambdaCase, TupleSections, DataKinds
           , GeneralizedNewtypeDeriving #-}

module Parser (Parser, St (..), runParser, token, end, lookAhead, try, getSrcPos) where
module Parser where

import Control.Applicative hiding (many, some)
import Control.Monad


@@ 15,6 15,8 @@ import qualified Data.Set as Set
import Misc
import SrcPos
import Lexd
import Parsed
import Pretty

data Err = Err { errLength :: Word, errPos :: SrcPos, errExpecteds :: Set String }
    deriving (Show, Eq)


@@ 92,3 94,56 @@ try ma = do

getSrcPos :: Parser SrcPos
getSrcPos = lookAhead (token "token" (Just .* const))

anyToken :: Parser TokenTree
anyToken = token "any token" (Just .* WithPos)

withPos :: Parser a -> Parser (WithPos a)
withPos = liftA2 WithPos getSrcPos

parens :: Parser a -> Parser a
parens ma = parens' (const ma)

parens' :: (SrcPos -> Parser a) -> Parser a
parens' = sexpr "`(`" $ \case
    Parens tts -> Just tts
    _ -> Nothing

brackets :: Parser a -> Parser a
brackets ma = brackets' (const ma)

brackets' :: (SrcPos -> Parser a) -> Parser a
brackets' = sexpr "`[`" $ \case
    Brackets tts -> Just tts
    _ -> Nothing

sexpr :: String -> (TokenTree' -> Maybe [TokenTree]) -> (SrcPos -> Parser a) -> Parser a
sexpr expected extract f = do
    (p, tts) <- token expected $ \p' -> fmap (p', ) . extract
    St _ pOld ttsOld <- get
    modify (\st -> st { stOuterPos = p, stInput = tts })
    a <- f p
    end
    modify (\st -> st { stOuterPos = pOld, stInput = ttsOld })
    pure a

big' :: Parser String
big' = fmap idstr big

big :: Parser (Id 'Parsed.Big)
big = token "big identifier" $ \p -> \case
    Lexd.Big x -> Just (Id (WithPos p x))
    _ -> Nothing

small' :: Parser String
small' = fmap idstr small

small :: Parser (Id 'Parsed.Small)
small = token "small identifier" $ \p -> \case
    Lexd.Small x -> Just (Id (WithPos p x))
    _ -> Nothing

reserved :: Keyword -> Parser ()
reserved k = token ("keyword " ++ pretty k) $ const $ \case
    Keyword k' | k == k' -> Just ()
    _ -> Nothing

M src/Pretty.hs => src/Pretty.hs +1 -0
@@ 57,6 57,7 @@ instance Pretty Lexd.Keyword where
        Lexd.Ksizeof -> "sizeof"
        Lexd.KidAt -> "id@"
        Lexd.KIdAt -> "Id@"
        Lexd.Kdefmacro -> "defmacro"


instance Pretty Parsed.Program where