~jojo/Carth

8ce489259582f2743967d8f36be552c5183cb912 — JoJo a month ago 83f3771 master
Intern strings in Lower & simplify codegen as a consequence
6 files changed, 120 insertions(+), 83 deletions(-)

M carth.cabal
M src/Back/Codegen.hs
M src/Back/Gen.hs
M src/Back/Low.hs
M src/Back/Lower.hs
M src/Front/Err.hs
M carth.cabal => carth.cabal +1 -0
@@ 74,6 74,7 @@ library
    , prettyprinter
    , process
    , utf8-string
    , vector
  default-extensions:
      LambdaCase
    , TupleSections

M src/Back/Codegen.hs => src/Back/Codegen.hs +35 -11
@@ 10,6 10,7 @@ import LLVM.AST.Type hiding (ptr)
import LLVM.AST.DataLayout
import qualified LLVM.AST.Type as LLType
import qualified LLVM.AST.Constant as LLConst
import qualified Codec.Binary.UTF8.String as UTF8.String
import Data.String
import System.FilePath
import Control.Monad.Reader


@@ 20,6 21,7 @@ import qualified Data.Set as Set
import Data.List
import Data.Function
import Data.Maybe
import qualified Data.Vector as Vec
import Lens.Micro.Platform (use, assign, view)

import Misc


@@ 43,7 45,7 @@ instance Select Gen Val where
    selectDeref = genDeref

codegen :: DataLayout -> ShortByteString -> FilePath -> Program -> Module
codegen layout triple moduleFilePath (Program (Topo defs) tdefs externs) =
codegen layout triple moduleFilePath (Program (Topo defs) tdefs externs strs) =
    let (tdefs', externs', globDefs) =
            let (enums, tdefs'') = runGen' (defineDataTypes tdefs)
                defs' = defToVarDefs =<< defs


@@ 55,13 57,14 @@ codegen layout triple moduleFilePath (Program (Topo defs) tdefs externs) =
                    $ withExternSigs externs
                    $ withGlobFunSigs funDefs
                    $ withGlobVarSigs varDefs
                    $ do
                    $ withStrLits strs
                    $ \strDefs -> do
                          es <- genExterns externs
                          funDefs' <- mapM genGlobFunDef funDefs
                          varDecls <- mapM genGlobVarDecl varDefs
                          init_ <- genInit varDefs
                          main <- genMain
                          let ds = main : init_ ++ join funDefs' ++ varDecls
                          let ds = strDefs ++ main : init_ ++ join funDefs' ++ varDecls
                          pure (tdefs'', es, ds)
    in  Module
            { moduleName = fromString (takeBaseName moduleFilePath)


@@ 93,6 96,30 @@ codegen layout triple moduleFilePath (Program (Topo defs) tdefs externs) =
            pure (v, (LLType.ptr t', mkName (mangleName (x, us))))
        augment globalEnv (Map.fromList sigs') ga

    withStrLits lits f = do
        (defs, refs) <- fmap unzip $ mapM globStrVar (Vec.toList lits)
        locallySet Back.Gen.strLits (Vec.fromList refs) $ f (concat defs)

    globStrVar s = do
        strName <- newName "strlit"
        name_inner <- newName "strlit_inner"
        let bytes = UTF8.String.encode s
            len = length bytes
            tInner = ArrayType (fromIntegral len) i8
            defInner =
                simpleGlobConst name_inner tInner (LLConst.Array i8 (map litI8' bytes))
            inner = LLConst.GlobalReference (LLType.ptr tInner) name_inner
            ptrBytes = LLConst.BitCast inner typeGenericPtr
            array =
                litStructNamed ("Array", [Ast.TPrim (TNat 8)]) [ptrBytes, litI64' len]
            str = litStructNamed ("Str", []) [array]
            defStr = simpleGlobConst strName typeStr str
            ref = VVar $ ConstantOperand
                (LLConst.GlobalReference (LLType.ptr typeStr) strName)
        pure (map GlobalDefinition [defInner, defStr], ref)

-- TODO: Use more specialized monad or none at all.
--
-- | A data-type is a tagged union, and we represent it in LLVM as a representing struct
--   of a tagged union, an untagged struct, an integer, or a zero-sized empty array,
--   depending on how many variants and members it has.


@@ 174,7 201,7 @@ genMain = do
            (mkName "carth_init")
    assign currentBlockLabel (mkName "entry")
    assign currentBlockInstrs []
    Out basicBlocks _ _ <- execWriterT $ do
    Out basicBlocks _ <- execWriterT $ do
        emitDo' =<< callBuiltin "install_stackoverflow_handler" []
        emitDo (callIntern Nothing init_ [(null' typeGenericPtr, []), (litUnit, [])])
        iof <- lookupVar (TypedVar "main" mainType)


@@ 269,13 296,10 @@ genConst :: Ast.Const -> Gen Val
genConst = \case
    Int n -> pure (VLocal (litI64 n))
    F64 x -> pure (VLocal (litF64 x))
    Str s -> genStrLit s
    Str s -> getStrLit s

genStrLit :: String -> Gen Val
genStrLit s = do
    var <- newName "strlit"
    scribe outStrings [(var, s)]
    pure $ VVar $ ConstantOperand (LLConst.GlobalReference (LLType.ptr typeStr) var)
getStrLit :: Word -> Gen Val
getStrLit s = view strLits <&> (Vec.! (fromIntegral s))

class TailVal v where
    propagate :: Val -> Gen v


@@ 377,7 401,7 @@ genDecisionTree = \case
        (matchee, selections') <- select selector selections
        let cs' = Map.toAscList cs
        let genCase (s, dt) next = do
                s' <- genStrLit s
                s' <- getStrLit s
                isMatch <- genStrEq matchee s'
                -- Do some wrapping to preserve effect order
                pure $ genCondBr isMatch (genDecisionTree dt selections') next

M src/Back/Gen.hs => src/Back/Gen.hs +10 -27
@@ 10,8 10,9 @@ import Control.Monad.Writer
import Control.Monad.State
import Control.Monad.Reader
import Control.Applicative
import qualified Codec.Binary.UTF8.String as UTF8.String
import Data.Map (Map)
import Data.Vector (Vector)
import qualified Data.Vector as Vec
import Data.Word
import Data.Foldable
import Data.Bifunctor


@@ 57,6 58,7 @@ data Env = Env
    , _enumTypes :: Map Name Word32
    , _dataTypes :: Map Name [Type]
    , _builtins :: Map String ([Parameter], Type)
    , _strLits :: Vector Val
    }

data St = St


@@ 74,7 76,6 @@ type Gen' = StateT St (Reader Env)
--   function that must be generated at the top-level.
data Out = Out
    { _outBlocks :: [BasicBlock]
    , _outStrings :: [(Name, String)]
    , _outFuncs :: [(Name, [TypedVar], TypedVar, Gen Type)]
    }



@@ 90,9 91,9 @@ makeLenses ''Out


instance Semigroup Out where
    Out bs1 ss1 fs1 <> Out bs2 ss2 fs2 = Out (bs1 <> bs2) (ss1 <> ss2) (fs1 <> fs2)
    Out bs1 fs1 <> Out bs2 fs2 = Out (bs1 <> bs2) (fs1 <> fs2)
instance Monoid Out where
    mempty = Out [] [] []
    mempty = Out [] []

instance Typed Val where
    typeOf = \case


@@ 108,7 109,7 @@ genFunDef :: (Name, [TypedVar], TypedVar, Gen Type) -> Gen' (Global, [Definition
genFunDef (name, fvs, ptv@(TypedVar px pt), genBody) = do
    assign currentBlockLabel (mkName "entry")
    assign currentBlockInstrs []
    ((rt, fParams), Out basicBlocks globStrings lambdaFuncs) <- runWriterT $ do
    ((rt, fParams), Out basicBlocks lambdaFuncs) <- runWriterT $ do
        (capturesParam, captureMembers) <- genExtractCaptures
        pt' <- genType pt
        px' <- newName px


@@ 116,28 117,12 @@ genFunDef (name, fvs, ptv@(TypedVar px pt), genBody) = do
        rt' <- withVal ptv pRef (withVals captureMembers genBody)
        let fParams' = [uncurry Parameter capturesParam [], Parameter pt' px' []]
        pure (rt', fParams')
    ss <- mapM globStrVar globStrings
    ls <- fmap
        concat
        (mapM (fmap (uncurry ((:) . GlobalDefinition)) . genFunDef) lambdaFuncs)
    let f = internFunc name fParams rt basicBlocks
    pure (f, concat ss ++ ls)
    pure (f, ls)
  where
    globStrVar (strName, s) = do
        name_inner <- newName' "strlit_inner"
        let bytes = UTF8.String.encode s
            len = length bytes
            tInner = ArrayType (fromIntegral len) i8
            defInner =
                simpleGlobConst name_inner tInner (LLConst.Array i8 (map litI8' bytes))
            inner = LLConst.GlobalReference (LLType.ptr tInner) name_inner
            ptrBytes = LLConst.BitCast inner typeGenericPtr
            array =
                litStructNamed ("Array", [Ast.TPrim (TNat 8)]) [ptrBytes, litI64' len]
            str = litStructNamed ("Str", []) [array]
            defStr = simpleGlobConst strName typeStr str
        pure (map GlobalDefinition [defInner, defStr])

    genExtractCaptures :: Gen ((Type, Name), [(TypedVar, Val)])
    genExtractCaptures = do
        capturesName <- newName "captures"


@@ 220,6 205,7 @@ runGen' g = runReader (evalStateT g initSt) initEnv
                  , _enumTypes = Map.empty
                  , _dataTypes = Map.empty
                  , _builtins = Map.empty
                  , _strLits = Vec.empty
                  }
    initSt = St { _currentBlockLabel = "entry"
                , _currentBlockInstrs = []


@@ 861,11 847,8 @@ commitToNewBlock t l = do
    assign currentBlockLabel l
    assign currentBlockInstrs []

newName :: String -> Gen Name
newName = lift . newName'

newName' :: String -> Gen' Name
newName' s = fmap (mkName . ((s ++ "_") ++) . show) (registerCount <<+= 1)
newName :: MonadState St m => String -> m Name
newName s = fmap (mkName . ((s ++ "_") ++) . show) (registerCount <<+= 1)

lookupEnum :: MonadReader Env m => Ast.TConst -> m (Maybe Word32)
lookupEnum tc = view (enumTypes . to (tconstLookup tc))

M src/Back/Low.hs => src/Back/Low.hs +9 -3
@@ 7,11 7,11 @@ import Data.Map (Map)
import qualified Data.Set as Set
import Data.Set (Set)
import Data.Bifunctor
import Data.Vector (Vector)

import Misc
import Front.Checked (VariantIx, Span)
import FreeVars
import Front.Parsed (Const(..))
import Front.TypeAst hiding (TConst)
import qualified Front.TypeAst as TypeAst
import Front.Monomorphic (Access'(..), Virt(..))


@@ 37,7 37,7 @@ type VarBindings = [(TypedVar, Access)]
data DecisionTree
    = DLeaf (VarBindings, Expr)
    | DSwitch Span Access (Map VariantIx DecisionTree) DecisionTree
    | DSwitchStr Access (Map String DecisionTree) DecisionTree
    | DSwitchStr Access (Map Word DecisionTree) DecisionTree
    deriving Show

type Ction = (VariantIx, Span, TConst, [Expr])


@@ 45,6 45,12 @@ type Fun = (TypedVar, (Expr, Type))

type Var = (Virt, TypedVar)

data Const
    = Int Int
    | F64 Double
    | Str Word
    deriving (Show, Eq)

data Expr
    = Lit Const
    | Var Var


@@ 67,7 73,7 @@ type FunDef = (TypedVar, (Inst, Fun))
type Datas = Map TConst [VariantTypes]
type Externs = [(String, Type)]

data Program = Program Defs Datas Externs
data Program = Program Defs Datas Externs (Vector String)
    deriving Show

instance TypeAst Type where

M src/Back/Lower.hs => src/Back/Lower.hs +63 -42
@@ 1,83 1,104 @@
module Back.Lower (lower, builtinExterns) where

import Control.Applicative
import Control.Monad.State
import Data.Bifunctor
import Data.Bitraversable
import Data.Functor
import Data.List
import qualified Data.Map as Map
import Data.Map (Map)
import qualified Data.Vector as Vec

import Misc
import qualified Front.Monomorphic as Ast
import qualified Front.Monomorphize as Monomorphize
import Back.Low

type StrLits = Map String Word

type Lower = State StrLits

lower :: Ast.Program -> Program
lower (Ast.Program defs datas externs) =
    Program (lowerDefs defs) (lowerDatas datas) (lowerExterns externs)
lower (Ast.Program defs datas externs) = flip evalState Map.empty $ do
    defs' <- lowerDefs defs
    strs <- get
    let strs' = Vec.fromList $ map fst $ sortOn snd $ Map.toList strs
    pure (Program defs' (lowerDatas datas) (lowerExterns externs) strs')

builtinExterns :: Map String Type
builtinExterns = fmap lowerType Monomorphize.builtinExterns

lowerDefs :: Ast.Defs -> Defs
lowerDefs (Topo defs) = Topo $ map lowerDef defs
lowerDefs :: Ast.Defs -> Lower Defs
lowerDefs (Topo defs) = fmap Topo $ mapM lowerDef defs

lowerDef :: Ast.Def -> Def
lowerDef :: Ast.Def -> Lower Def
lowerDef = \case
    Ast.VarDef d -> VarDef $ lowerVarDef d
    Ast.RecDefs ds -> RecDefs $ lowerRecDefs ds
    Ast.VarDef d -> fmap VarDef $ lowerVarDef d
    Ast.RecDefs ds -> fmap RecDefs $ lowerRecDefs ds

lowerVarDef :: Ast.VarDef -> VarDef
lowerVarDef = bimap lowerTypedVar (bimap (map lowerType) lowerExpr)
lowerVarDef :: Ast.VarDef -> Lower VarDef
lowerVarDef = bimapM (pure . lowerTypedVar) (bimapM (pure . map lowerType) lowerExpr)

lowerRecDefs :: Ast.RecDefs -> RecDefs
lowerRecDefs = map lowerFunDef
lowerRecDefs :: Ast.RecDefs -> Lower RecDefs
lowerRecDefs = mapM lowerFunDef

lowerFunDef :: Ast.FunDef -> FunDef
lowerFunDef = bimap lowerTypedVar (bimap (map lowerType) lowerFun)
lowerFunDef :: Ast.FunDef -> Lower FunDef
lowerFunDef = bimapM (pure . lowerTypedVar) (bimapM (pure . map lowerType) lowerFun)

lowerFun :: Ast.Fun -> Fun
lowerFun = bimap lowerTypedVar (bimap lowerExpr lowerType)
lowerFun :: Ast.Fun -> Lower Fun
lowerFun = bimapM (pure . lowerTypedVar) (bimapM lowerExpr (pure . lowerType))

lowerExpr :: Ast.Expr -> Expr
lowerExpr :: Ast.Expr -> Lower Expr
lowerExpr = \case
    Ast.Lit c -> Lit c
    Ast.Var v -> Var $ second lowerTypedVar v
    Ast.Lit (Ast.Int n) -> pure $ Lit (Int n)
    Ast.Lit (Ast.F64 x) -> pure $ Lit (F64 x)
    Ast.Lit (Ast.Str s) -> fmap (Lit . Str) (internStrLit s)
    Ast.Var v -> pure $ Var $ second lowerTypedVar v
    Ast.App f a -> lowerApp f [a]
    Ast.If p c a -> If (lowerExpr p) (lowerExpr c) (lowerExpr a)
    Ast.Fun f -> Fun (lowerFun f)
    Ast.Let d e -> Let (lowerDef d) (lowerExpr e)
    Ast.Match m dt -> Match (lowerExpr m) (lowerDecisionTree dt)
    Ast.Ction c -> Ction $ lowerCtion c
    Ast.Sizeof t -> Sizeof $ lowerType t
    Ast.Absurd t -> Absurd $ lowerType t
    Ast.If p c a -> liftA3 If (lowerExpr p) (lowerExpr c) (lowerExpr a)
    Ast.Fun f -> fmap Fun (lowerFun f)
    Ast.Let d e -> liftA2 Let (lowerDef d) (lowerExpr e)
    Ast.Match m dt -> liftA2 Match (lowerExpr m) (lowerDecisionTree dt)
    Ast.Ction c -> fmap Ction $ lowerCtion c
    Ast.Sizeof t -> pure $ Sizeof $ lowerType t
    Ast.Absurd t -> pure $ Absurd $ lowerType t

internStrLit :: String -> Lower Word
internStrLit s = get >>= \m -> case Map.lookup s m of
    Just n -> pure n
    Nothing -> let n = fromIntegral (Map.size m) in modify (Map.insert s n) $> n

-- | Performs a sort of beta reduction
lowerApp :: Ast.Expr -> [Ast.Expr] -> Expr
lowerApp :: Ast.Expr -> [Ast.Expr] -> Lower Expr
lowerApp = curry $ \case
    (Ast.Fun (p, (b, _)), a : as) -> Let
        (VarDef
            ( lowerTypedVar p
            -- FIXME: This pos is pretty bad probably?
            , (uniqueInst, lowerExpr a)
            )
        )
    (Ast.Fun (p, (b, _)), a : as) -> liftA2
        Let
        (fmap (VarDef . (lowerTypedVar p, ) . (uniqueInst, )) (lowerExpr a))
        (lowerApp b as)
    (Ast.App f a, as) -> lowerApp f (a : as)
    (f, []) -> lowerExpr f
    (f, as) -> App (lowerExpr f) (map lowerExpr as)
    (f, as) -> liftA2 App (lowerExpr f) (mapM lowerExpr as)
    where uniqueInst = []

lowerDecisionTree :: Ast.DecisionTree -> DecisionTree
lowerDecisionTree :: Ast.DecisionTree -> Lower DecisionTree
lowerDecisionTree = \case
    Ast.DLeaf (bs, e) -> DLeaf (map (bimap lowerTypedVar lowerAccess) bs, lowerExpr e)
    Ast.DSwitch span a cs def ->
        DSwitch span (lowerAccess a) (fmap lowerDecisionTree cs) (lowerDecisionTree def)
    Ast.DSwitchStr a cs def ->
        DSwitchStr (lowerAccess a) (fmap lowerDecisionTree cs) (lowerDecisionTree def)
    Ast.DLeaf (bs, e) -> liftA2 (DLeaf .* (,))
                                (pure $ map (bimap lowerTypedVar lowerAccess) bs)
                                (lowerExpr e)
    Ast.DSwitch span a cs def -> liftA2 (DSwitch span (lowerAccess a))
                                        (mapM lowerDecisionTree cs)
                                        (lowerDecisionTree def)
    Ast.DSwitchStr a cs def -> liftA2
        (DSwitchStr (lowerAccess a))
        (fmap Map.fromList $ mapM (bimapM internStrLit lowerDecisionTree) $ Map.toList cs)
        (lowerDecisionTree def)

lowerAccess :: Ast.Access -> Access
lowerAccess = fmap lowerType

lowerCtion :: Ast.Ction -> Ction
lowerCtion (i, s, tc, es) = (i, s, lowerTConst tc, map lowerExpr es)
lowerCtion :: Ast.Ction -> Lower Ction
lowerCtion (i, s, tc, es) = fmap (i, s, lowerTConst tc, ) $ mapM lowerExpr es

lowerDatas :: Ast.Datas -> Datas
lowerDatas = Map.fromList . map (bimap lowerTConst (map (map lowerType))) . Map.toList

M src/Front/Err.hs => src/Front/Err.hs +2 -0
@@ 87,6 87,8 @@ posd = posd' "Error"

posd' :: String -> SrcPos -> Message -> IO ()
posd' kind (pos@(SrcPos f lineN colN inExp)) msg = do
    -- TODO: Keep source files in memory. They don't take up much space, and there's no
    --       risk of them coming out of sync due to new changes.
    src <- readFile f
    let (lineN', colN') = (fromIntegral lineN, fromIntegral colN)
        lines' = lines src