~jojo/Carth

b2cf053415087f7707c5a9b74a16498b509150d1 — JoJo 1 year, 10 months ago 4d62a17
Refactor Codegen. Separate module for ABI-related stuff

Also had to separate a module for the Gen-monad, as Haskell can't have
cyclical imports.
3 files changed, 431 insertions(+), 398 deletions(-)

A src/Abi.hs
M src/Codegen.hs
A src/Gen.hs
A src/Abi.hs => src/Abi.hs +198 -0
@@ 0,0 1,198 @@
{-# LANGUAGE LambdaCase #-}

-- | Stuff relating to structure layout and calling convention
--
--   One might think that simply declaring all function definitions and function
--   calls as being of the same LLVM calling convention (e.g. "ccc") would allow
--   us to pass arguments and return results as we please, and everything will
--   be compatible? I sure did, however, that is not the case. To be compatible
--   with C FFIs, we also have to actually conform to the C calling convention,
--   which contains a bunch of details about how more complex types should be
--   passed and returned. Currently, we pass and return simple types by value,
--   and complex types by reference (param by ref, return via sret param).
--
--   See the definition of `passByRef` for up-to-date details about which types
--   are passed how.
module Abi
    ( simpleFunc
    , simpleFunc'
    , simpleGlobVar
    , simpleGlobVar'
    , passByRef
    , passByRef'
    , sizeof
    , tagBitWidth
    , cfg_callConv
    )
where

import LLVM.AST
import qualified LLVM.AST.CallingConvention as LLCallConv
import qualified LLVM.AST.Linkage as LLLink
import qualified LLVM.AST.Visibility as LLVis
import qualified LLVM.AST.Constant as LLConst
import LLVM.AST.Global (Parameter)
import qualified LLVM.AST.Global as LLGlob
import qualified LLVM.AST.AddrSpace as LLAddr
import qualified LLVM.AST.FunctionAttribute as LLFnAttr
import Control.Monad.Writer
import qualified Data.Map as Map
import Data.Word
import Data.Foldable
import Data.Composition
import Control.Lens (views)

import Misc
import MonoAst (Span)
import Gen


simpleFunc :: Name -> [Parameter] -> Type -> [BasicBlock] -> Global
simpleFunc = ($ []) .** simpleFunc'

simpleFunc'
    :: Name
    -> [Parameter]
    -> Type
    -> [LLFnAttr.FunctionAttribute]
    -> [BasicBlock]
    -> Global
simpleFunc' n ps rt fnAttrs bs = Function
    { LLGlob.linkage = LLLink.External
    , LLGlob.visibility = LLVis.Default
    , LLGlob.dllStorageClass = Nothing
    , LLGlob.callingConvention = cfg_callConv
    , LLGlob.returnAttributes = []
    , LLGlob.returnType = rt
    , LLGlob.name = n
    , LLGlob.parameters = (ps, False)
    , LLGlob.functionAttributes = map Right fnAttrs
    , LLGlob.section = Nothing
    , LLGlob.comdat = Nothing
    , LLGlob.alignment = 0
    , LLGlob.garbageCollectorName = Nothing
    , LLGlob.prefix = Nothing
    , LLGlob.basicBlocks = bs
    , LLGlob.personalityFunction = Nothing
    , LLGlob.metadata = []
    }

simpleGlobVar :: Name -> Type -> LLConst.Constant -> Global
simpleGlobVar name t = simpleGlobVar' name t . Just

simpleGlobVar' :: Name -> Type -> Maybe LLConst.Constant -> Global
simpleGlobVar' name t init = GlobalVariable
    { LLGlob.name = name
    , LLGlob.linkage = LLLink.External
    , LLGlob.visibility = LLVis.Default
    , LLGlob.dllStorageClass = Nothing
    , LLGlob.threadLocalMode = Nothing
    , LLGlob.addrSpace = LLAddr.AddrSpace 0
    , LLGlob.unnamedAddr = Nothing
    , LLGlob.isConstant = True
    , LLGlob.type' = t
    , LLGlob.initializer = init
    , LLGlob.section = Nothing
    , LLGlob.comdat = Nothing
    , LLGlob.alignment = 0
    , LLGlob.metadata = []
    }

passByRef :: Type -> Gen Bool
passByRef = lift . passByRef'

-- NOTE: This post is helpful:
--       https://stackoverflow.com/questions/42411819/c-on-x86-64-when-are-structs-classes-passed-and-returned-in-registers
--       Also, official docs:
--       https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf
--       particularly section 3.2.3 Parameter Passing (p18).
passByRef' :: Type -> Gen' Bool
passByRef' = \case
    NamedTypeReference x -> passByRef' =<< views dataTypes (Map.! x)
    -- Simple scalar types. They go in registers.
    VoidType -> pure False
    IntegerType _ -> pure False
    PointerType _ _ -> pure False
    FloatingPointType _ -> pure False
    -- Functions are not POD (Plain Ol' Data), so they are passed on the stack.
    FunctionType _ _ _ -> pure True
    -- TODO: Investigate how exactly SIMD vectors are to be passed when/if we
    --       ever add support for that in the rest of the compiler.
    VectorType _ _ -> pure True
    -- Aggregate types can either be passed on stack or in regs, depending on
    -- what they contain.
    t@(StructureType _ us) -> do
        size <- sizeof t
        if size > 16 then pure True else fmap or (mapM passByRef' us)
    ArrayType _ u -> do
        size <- sizeof u
        if size > 16 then pure True else passByRef' u
    -- N/A
    MetadataType -> ice "passByRef of MetadataType"
    LabelType -> ice "passByRef of LabelType"
    TokenType -> ice "passByRef of TokenType"

-- TODO: Handle packed
--
-- TODO: Handle different data layouts. Check out LLVMs DataLayout class and
--       impl of `getTypeAllocSize`.
--       https://llvm.org/doxygen/classllvm_1_1DataLayout.html
--
-- | Haskell-native implementation of `sizeof`, in contrast to
--   `getTypeAllocSize` of `llvm-hs`.
--
--   The problem with `getTypeAllocSize` is that it requires an `EncodeAST`
--   monad and messy manipulations. Specifically, I had some recursive bindings
--   going on, but to represent them in a monad I needed `mfix`, but `EncodeAST`
--   didn't have `mfix`!
--
--   See the [System V ABI docs](https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf)
--   for more info.
sizeof :: Type -> Gen' Word64
sizeof = \case
    NamedTypeReference x -> sizeof =<< lookupDatatype x
    IntegerType bits -> pure (fromIntegral (toBytesCeil bits))
    PointerType _ _ -> pure 8
    FloatingPointType HalfFP -> pure 2
    FloatingPointType FloatFP -> pure 4
    FloatingPointType DoubleFP -> pure 8
    FloatingPointType FP128FP -> pure 16
    FloatingPointType X86_FP80FP -> pure 16
    FloatingPointType PPC_FP128FP -> pure 16
    StructureType _ us -> foldlM addMember 0 us
    VectorType n u -> fmap (fromIntegral n *) (sizeof u)
    ArrayType n u -> fmap (n *) (sizeof u)
    VoidType -> ice "sizeof VoidType"
    FunctionType _ _ _ -> ice "sizeof FunctionType"
    MetadataType -> ice "sizeof MetadataType"
    LabelType -> ice "sizeof LabelType"
    TokenType -> ice "sizeof TokenType"
  where
    toBytesCeil nbits = div (nbits + 7) 8
    addMember accSize u = do
        align <- alignmentof u
        let padding = mod (align - accSize) align
        size <- sizeof u
        pure (accSize + padding + size)

alignmentof :: Type -> Gen' Word64
alignmentof = \case
    NamedTypeReference x -> alignmentof =<< lookupDatatype x
    StructureType _ us -> fmap maximum (traverse alignmentof us)
    VectorType _ u -> alignmentof u
    ArrayType _ u -> alignmentof u
    t -> sizeof t

tagBitWidth :: Span -> Maybe Word32
tagBitWidth span'
    | span' <= 2 ^ (0 :: Integer) = Nothing
    | span' <= 2 ^ (8 :: Integer) = Just 8
    | span' <= 2 ^ (16 :: Integer) = Just 16
    | span' <= 2 ^ (32 :: Integer) = Just 32
    | span' <= 2 ^ (64 :: Integer) = Just 64
    | otherwise = ice $ "tagBitWidth: span' = " ++ show span'

-- TODO: Try out "tailcc" - Tail callable calling convention. It looks like
--       exactly what I want!
cfg_callConv :: LLCallConv.CallingConvention
cfg_callConv = LLCallConv.C

M src/Codegen.hs => src/Codegen.hs +156 -398
@@ 1,22 1,6 @@
{-# LANGUAGE OverloadedStrings, LambdaCase, TemplateHaskell, TupleSections
           , FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings, LambdaCase, TupleSections, FlexibleContexts #-}

-- | Generation of LLVM IR code from our monomorphic AST.
--
--   # On ABI / Calling Conventions
--
--   One might think that simply declaring all function definitions and function
--   calls as being of the same LLVM calling convention (e.g. "ccc") would allow
--   us to pass arguments and return results as we please, and everything will
--   be compatible? I sure did, however, that is not the case. To be compatible
--   with C FFIs, we also have to actually conform to the C calling convention,
--   which contains a bunch of details about how more complex types should be
--   passed and returned. Currently, we pass and return simple types by value,
--   and complex types by reference (param by ref, return via sret param).
--
--   See the definition of `passByRef` for up-to-date details about which types
--   are passed how.

module Codegen (codegen) where

import LLVM.AST


@@ 25,15 9,8 @@ import LLVM.AST.Type hiding (ptr)
import LLVM.AST.DataLayout
import LLVM.AST.ParameterAttribute
import qualified LLVM.AST.Type as LLType
import qualified LLVM.AST.CallingConvention as LLCallConv
import qualified LLVM.AST.Linkage as LLLink
import qualified LLVM.AST.Visibility as LLVis
import qualified LLVM.AST.Constant as LLConst
import qualified LLVM.AST.Float as LLFloat
import LLVM.AST.Global (Parameter)
import qualified LLVM.AST.Global as LLGlob
import qualified LLVM.AST.AddrSpace as LLAddr
import qualified LLVM.AST.FunctionAttribute as LLFnAttr
import qualified Codec.Binary.UTF8.String as UTF8.String
import Data.String
import System.FilePath


@@ 47,27 24,18 @@ import qualified Data.Set as Set
import Data.Word
import Data.Foldable
import Data.List
import Data.Composition
import Data.Functor
import Control.Applicative
import Control.Lens
    ( makeLenses
    , modifying
    , scribe
    , (<<+=)
    , (<<.=)
    , use
    , uses
    , assign
    , views
    , locally
    )
    (modifying, scribe, (<<+=), (<<.=), use, uses, assign, views, locally)

import Misc
import FreeVars
import qualified MonoAst
import MonoAst hiding (Type, Const)
import Selections
import Gen
import Abi


-- | An instruction that returns a value. The name refers to the fact that a


@@ 79,44 47,6 @@ data Val
    = VVar Operand
    | VLocal Operand

data Env = Env
    -- TODO: Could operands in env be Val instead? I.e., either stack-allocated
    --       or local?
    { _env :: Map TypedVar Operand  -- ^ Environment of stack allocated variables
    , _dataTypes :: Map Name Type
    }
makeLenses ''Env

data St = St
    { _currentBlockLabel :: Name
    , _currentBlockInstrs :: [Named Instruction]
    , _registerCount :: Word
    -- | Keep track of the parent function name so that we can name the
    --   outermost lambdas of a function definition well.
    , _lambdaParentFunc :: Maybe String
    , _outerLambdaN :: Word
    }
makeLenses ''St

type Gen' = StateT St (Reader Env)

-- | The output of generating a function
data Out = Out
    { _outBlocks :: [BasicBlock]
    , _outStrings :: [(Name, String)]
    , _outFuncs :: [(Name, [TypedVar], TypedVar, Expr)]
    }
makeLenses ''Out

type Gen = WriterT Out Gen'


instance Semigroup Out where
    Out bs1 ss1 fs1 <> Out bs2 ss2 fs2 =
        Out (bs1 <> bs2) (ss1 <> ss2) (fs1 <> fs2)
instance Monoid Out where
    mempty = Out [] [] []

instance Typed Val where
    typeOf = \case
        VVar x -> getPointee (typeOf x)


@@ 154,7 84,7 @@ codegen layout moduleFilePath (Program defs tdefs externs) =
    withDataTypes = augment dataTypes
    withExternSigs es ga = do
        es' <- forM es $ \(name, t) -> do
            t' <- toLlvmType' t
            t' <- genType' t
            pure
                ( TypedVar name t
                , ConstantOperand


@@ 163,7 93,7 @@ codegen layout moduleFilePath (Program defs tdefs externs) =
        augment env (Map.fromList es') ga
    withGlobDefSigs sigs ga = do
        sigs' <- forM sigs $ \(v@(TypedVar x t), (us, _)) -> do
            t' <- toLlvmType' t
            t' <- genType' t
            pure
                ( v
                , ConstantOperand $ LLConst.GlobalReference


@@ 172,20 102,18 @@ codegen layout moduleFilePath (Program defs tdefs externs) =
                )
        augment env (Map.fromList sigs') ga

-- | Convert data-type definitions from `MonoAst` format to LLVM format, and
--   then return them as `Definition`s so that they may be exported in the
--   Module AST.
--
--   A data-type is a tagged union, and is represented in LLVM as a struct where
--   the first element is the variant-index as an i64, and the rest of the
--   elements are the field-types of the largest variant wrt allocation size.
-- | A data-type is a tagged union, and we represent it in LLVM as a struct
--   where, if there are more than 1 variant, the first element is the
--   variant-index. The variant index is represented as an integer with the
--   smallest width 2^n that can fit all variants. The rest of the elements are
--   the field-types of the largest variant wrt allocation size.
defineDataTypes :: TypeDefs -> Gen' (Map Name Type)
defineDataTypes tds = do
    mfix $ \tds' ->
        fmap Map.fromList $ augment dataTypes tds' $ forM tds $ \(tc, vs) -> do
            let n = mkName (mangleTConst tc)
            let totVariants = length vs
            ts <- mapM (toLlvmVariantType (fromIntegral totVariants)) vs
            ts <- mapM (genVariantType (fromIntegral totVariants)) vs
            sizedTs <- mapM (\t -> fmap (\s -> (s, t)) (sizeof t)) ts
            let (_, tmax) = maximum sizedTs
            pure (n, tmax)


@@ 217,7 145,7 @@ genExterns :: [(String, MonoAst.Type)] -> Gen' [Definition]
genExterns = mapM (uncurry genExtern)

genExtern :: String -> MonoAst.Type -> Gen' Definition
genExtern name t = toLlvmType' t
genExtern name t = genType' t
    <&> \t' -> GlobalDefinition $ simpleGlobVar' (mkName name) t' Nothing

genMain :: Gen' Definition


@@ 226,8 154,8 @@ genMain = do
    assign currentBlockInstrs []
    Out basicBlocks _ _ <- execWriterT $ do
        f <- lookupVar (TypedVar "start" startType)
        _ <- app f (VLocal (ConstantOperand litUnit)) typeUnit
        commitFinalFuncBlock (ret (ConstantOperand (litI32 0)))
        _ <- app f (VLocal litUnit) typeUnit
        commitFinalFuncBlock (ret (litI32 0))
    pure (GlobalDefinition (simpleFunc (mkName "main") [] i32 basicBlocks))

-- TODO: Change global defs to a new type that can be generated by llvm. As it


@@ 253,7 181,7 @@ genClosureWrappedFunDef var p body = do
    let fRef = LLConst.GlobalReference (LLType.ptr (typeOf f)) fName
    let capturesType = LLType.ptr typeUnit
    let captures = LLConst.Null capturesType
    let closure = litStruct [captures, fRef]
    let closure = litStruct' [captures, fRef]
    let closureDef = simpleGlobVar (mkName name) (typeOf closure) closure
    pure (closureDef : f : gs)



@@ 267,7 195,7 @@ genFunDef (name, fvs, ptv@(TypedVar px pt), body) = do
    assign currentBlockInstrs []
    ((rt, fParams), Out basicBlocks globStrings lambdaFuncs) <- runWriterT $ do
        (capturesParam, captureLocals) <- genExtractCaptures fvs
        pt' <- toLlvmType pt
        pt' <- genType pt
        px' <- newName px
        -- Load params according to calling convention
        passParamByRef <- passByRef pt'


@@ 299,38 227,39 @@ genFunDef (name, fvs, ptv@(TypedVar px pt), body) = do
    globStrVar (strName, s) = do
        name_inner <- newName' "strlit_inner"
        let bytes = UTF8.String.encode s
            len = fromIntegral (length bytes)
            tInner = ArrayType len i8
            len = length bytes
            tInner = ArrayType (fromIntegral len) i8
            defInner = simpleGlobVar
                name_inner
                tInner
                (LLConst.Array i8 (map litI8 bytes))
                (LLConst.Array i8 (map litI8' bytes))
            inner = LLConst.GlobalReference (LLType.ptr tInner) name_inner
            ptrBytes = LLConst.BitCast inner (LLType.ptr i8)
            array = litStructOfType
            array = litStructNamed'
                ("Array", [TPrim TNat8])
                [ptrBytes, litU64 len]
            str = litStructOfType ("Str", []) [array]
                [ptrBytes, litI64' len]
            str = litStructNamed' ("Str", []) [array]
            defStr = simpleGlobVar strName typeStr str
        pure [defInner, defStr]

genExtractCaptures :: [TypedVar] -> Gen ((Type, Name), [(TypedVar, Operand)])
genExtractCaptures fvs = do
    capturesName <- newName "captures"
    let capturesPtrGenericType = LLType.ptr typeUnit
    let capturesPtrGeneric = LocalReference capturesPtrGenericType capturesName
    let capturesParam = (capturesPtrGenericType, capturesName)
    fmap (capturesParam, ) $ if null fvs
        then pure []
        else do
            capturesType <- typeCaptures fvs
            capturesPtr <- emitAnon
                (bitcast capturesPtrGeneric (LLType.ptr capturesType))
            captures <- emitAnon (load capturesPtr)
            captureVals <- mapM
                (\(TypedVar x _, i) -> emitReg' x =<< extractvalue captures [i])
                (zip fvs [0 ..])
            pure (zip fvs captureVals)
    genExtractCaptures fvs = do
        capturesName <- newName "captures"
        let capturesPtrGenericType = LLType.ptr typeUnit
        let capturesPtrGeneric =
                LocalReference capturesPtrGenericType capturesName
        let capturesParam = (capturesPtrGenericType, capturesName)
        fmap (capturesParam, ) $ if null fvs
            then pure []
            else do
                capturesType <- genCapturesType fvs
                capturesPtr <- emitAnon
                    (bitcast capturesPtrGeneric (LLType.ptr capturesType))
                captures <- emitAnon (load capturesPtr)
                captureVals <- mapM
                    (\(TypedVar x _, i) ->
                        emitReg' x =<< extractvalue captures [i]
                    )
                    (zip fvs [0 ..])
                pure (zip fvs captureVals)

genExpr :: Expr -> Gen Val
genExpr expr = do


@@ 342,93 271,23 @@ genExpr expr = do
        If p c a -> genIf p c a
        Fun p b -> assign lambdaParentFunc parent *> genLambda p b
        Let ds b -> genLet ds b
        Match e cs tbody -> genMatch e cs =<< toLlvmType tbody
        Match e cs tbody -> genMatch e cs =<< genType tbody
        Ction c -> genCtion c
        Box e -> genBox =<< genExpr e
        Deref e -> genDeref e

toLlvmDataType :: MonoAst.TConst -> Type
toLlvmDataType = typeNamed . mangleTConst

toLlvmVariantType :: Span -> [MonoAst.Type] -> Gen' Type
toLlvmVariantType totVariants =
    fmap (typeStruct . maybe id ((:) . IntegerType) (tagBitWidth totVariants))
        . mapM toLlvmType'

toLlvmType :: MonoAst.Type -> Gen Type
toLlvmType = lift . toLlvmType'

-- | Convert to the LLVM representation of a type in an expression-context.
toLlvmType' :: MonoAst.Type -> Gen' Type
toLlvmType' = \case
    TPrim tc -> pure $ case tc of
        TUnit -> typeUnit
        TNat8 -> i8
        TNat16 -> i16
        TNat32 -> i32
        TNat -> i64
        TInt8 -> i8
        TInt16 -> i16
        TInt32 -> i32
        TInt -> i64
        TDouble -> double
        TChar -> i32
        TBool -> typeBool
    TFun a r -> toLlvmClosureType a r
    TBox t -> fmap LLType.ptr (toLlvmType' t)
    TConst t -> pure $ typeNamed (mangleTConst t)

-- | A `Fun` is a closure, and follows a certain calling convention
--
--   A closure is represented as a pair where the first element is the pointer
--   to the structure of captures, and the second element is a pointer to the
--   actual function, which takes as first parameter the captures-pointer, and
--   as second parameter the argument.
--
--   An argument of a structure-type is passed by reference, to be compatible
--   with the C calling convention.
toLlvmClosureType :: MonoAst.Type -> MonoAst.Type -> Gen' Type
toLlvmClosureType a r = toLlvmClosureFunType a r
    <&> \c -> typeStruct [LLType.ptr typeUnit, LLType.ptr c]

-- The type of the function itself within the closure
toLlvmClosureFunType :: MonoAst.Type -> MonoAst.Type -> Gen' Type
toLlvmClosureFunType a r = do
    a' <- toLlvmType' a
    r' <- toLlvmType' r
    passArgByRef <- passByRef' a'
    let a'' = if passArgByRef then LLType.ptr a' else a'
    returnResultByRef <- passByRef' r'
    pure $ if returnResultByRef
        then FunctionType
            { resultType = LLType.void
            , argumentTypes = [LLType.ptr r', LLType.ptr typeUnit, a'']
            , isVarArg = False
            }
        else FunctionType
            { resultType = r'
            , argumentTypes = [LLType.ptr typeUnit, a'']
            , isVarArg = False
            }

genConst :: MonoAst.Const -> Gen Val
genConst = \case
    Unit -> pure (VLocal (ConstantOperand litUnit))
    Int n -> pure (VLocal (ConstantOperand (litI64 n)))
    Double x -> pure (VLocal (ConstantOperand (litDouble x)))
    Char c -> pure (VLocal (ConstantOperand (litI32 (Data.Char.ord c))))
    Unit -> pure (VLocal litUnit)
    Int n -> pure (VLocal (litI64 n))
    Double x -> pure (VLocal (litDouble x))
    Char c -> pure (VLocal (litI32 (Data.Char.ord c)))
    Str s -> do
        var <- newName "strlit"
        scribe outStrings [(var, s)]
        pure $ VVar $ ConstantOperand
            (LLConst.GlobalReference (LLType.ptr typeStr) var)
    Bool b -> pure (VLocal (ConstantOperand (litBool b)))

lookupVar :: TypedVar -> Gen Val
lookupVar x = do
    views env (Map.lookup x) >>= \case
        Just var -> pure (VVar var)
        Nothing -> ice $ "Undefined variable " ++ show x
    Bool b -> pure (VLocal (litBool b))

-- | Beta-reduction and closure application
genApp :: Expr -> Expr -> MonoAst.Type -> Gen Val


@@ 445,7 304,7 @@ genApp fe' ae' rt' = genApp' (fe', [(ae', rt')])
        (fe, aes) -> do
            closure <- genExpr fe
            as <- mapM
                (\(ae, rt) -> liftA2 (,) (genExpr ae) (toLlvmType rt))
                (\(ae, rt) -> liftA2 (,) (genExpr ae) (genType rt))
                aes
            foldlM (\f (a, rt) -> app f a rt) closure as



@@ 503,7 362,7 @@ genLet :: Defs -> Expr -> Gen Val
genLet ds b = do
    let (vs, es) = unzip (Map.toList ds)
    ps <- forM vs $ \(TypedVar n t) -> do
        t' <- toLlvmType t
        t' <- genType t
        emitReg' n (alloca t')
    withVars (zip vs ps) $ do
        forM_ (zip ps es) $ \(p, (_, e)) -> do


@@ 560,7 419,7 @@ genDecisionLeaf (bs, e) selections =

genAs :: Span -> [MonoAst.Type] -> Operand -> Gen Operand
genAs totVariants ts matchee = do
    tvariant <- lift (toLlvmVariantType totVariants ts)
    tvariant <- lift (genVariantType totVariants ts)
    let tgeneric = typeOf matchee
    pGeneric <- emitReg' "ction_ptr_generic" (alloca tgeneric)
    emit (store matchee pGeneric)


@@ 581,21 440,12 @@ genCtion (i, span', dataType, as) = do
            (tagBitWidth span')
    s <- getLocal =<< genStruct (tag as')
    let t = typeOf s
    let tgeneric = toLlvmDataType dataType
    let tgeneric = genDatatypeRef dataType
    pGeneric <- emitReg' "ction_ptr_generic" (alloca tgeneric)
    p <- emitReg' "ction_ptr" (bitcast pGeneric (LLType.ptr t))
    emit (store s p)
    pure (VVar pGeneric)

tagBitWidth :: Span -> Maybe Word32
tagBitWidth span'
    | span' <= 2 ^ (0 :: Integer) = Nothing
    | span' <= 2 ^ (8 :: Integer) = Just 8
    | span' <= 2 ^ (16 :: Integer) = Just 16
    | span' <= 2 ^ (32 :: Integer) = Just 32
    | span' <= 2 ^ (64 :: Integer) = Just 64
    | otherwise = ice $ "tagBitWidth: span' = " ++ show span'

-- TODO: Eta-conversion
-- | A lambda is a pair of a captured environment and a function.  The captured
--   environment must be on the heap, since the closure value needs to be of


@@ 615,7 465,7 @@ genLambda p@(TypedVar px pt) (b, bt) = do
        Just s ->
            fmap (mkName . ((s ++ "_func_") ++) . show) (outerLambdaN <<+= 1)
        Nothing -> newName "func"
    ft <- lift (toLlvmClosureFunType pt bt)
    ft <- lift (genClosureFunType pt bt)
    let
        f = VLocal $ ConstantOperand $ LLConst.GlobalReference
            (LLType.ptr ft)


@@ 649,7 499,7 @@ genBox' x = do

genHeapAlloc :: Type -> Gen Operand
genHeapAlloc t = do
    size <- fmap litU64' (lift (sizeof t))
    size <- fmap (litI64 . fromIntegral) (lift (sizeof t))
    emitAnon (callExtern "carth_alloc" (LLType.ptr typeUnit) [size])

genDeref :: Expr -> Gen Val


@@ 657,61 507,10 @@ genDeref e = genExpr e >>= \case
    VVar x -> fmap VVar (emitAnon (load x))
    VLocal x -> pure (VVar x)

simpleFunc :: Name -> [Parameter] -> Type -> [BasicBlock] -> Global
simpleFunc = ($ []) .** simpleFunc'

simpleFunc'
    :: Name
    -> [Parameter]
    -> Type
    -> [LLFnAttr.FunctionAttribute]
    -> [BasicBlock]
    -> Global
simpleFunc' n ps rt fnAttrs bs = Function
    { LLGlob.linkage = LLLink.External
    , LLGlob.visibility = LLVis.Default
    , LLGlob.dllStorageClass = Nothing
    , LLGlob.callingConvention = cfg_callConv
    , LLGlob.returnAttributes = []
    , LLGlob.returnType = rt
    , LLGlob.name = n
    , LLGlob.parameters = (ps, False)
    , LLGlob.functionAttributes = map Right fnAttrs
    , LLGlob.section = Nothing
    , LLGlob.comdat = Nothing
    , LLGlob.alignment = 0
    , LLGlob.garbageCollectorName = Nothing
    , LLGlob.prefix = Nothing
    , LLGlob.basicBlocks = bs
    , LLGlob.personalityFunction = Nothing
    , LLGlob.metadata = []
    }

simpleGlobVar :: Name -> Type -> LLConst.Constant -> Global
simpleGlobVar name t = simpleGlobVar' name t . Just

simpleGlobVar' :: Name -> Type -> Maybe LLConst.Constant -> Global
simpleGlobVar' name t init = GlobalVariable
    { LLGlob.name = name
    , LLGlob.linkage = LLLink.External
    , LLGlob.visibility = LLVis.Default
    , LLGlob.dllStorageClass = Nothing
    , LLGlob.threadLocalMode = Nothing
    , LLGlob.addrSpace = LLAddr.AddrSpace 0
    , LLGlob.unnamedAddr = Nothing
    , LLGlob.isConstant = True
    , LLGlob.type' = t
    , LLGlob.initializer = init
    , LLGlob.section = Nothing
    , LLGlob.comdat = Nothing
    , LLGlob.alignment = 0
    , LLGlob.metadata = []
    }

getVar :: Val -> Gen Operand
getVar = \case
    VVar x -> pure x
    VLocal x -> genStackAllocated' x
    VLocal x -> genStackAllocated x

getLocal :: Val -> Gen Operand
getLocal = \case


@@ 725,7 524,7 @@ withLocals = flip (foldr (uncurry withLocal))
--   the environment with the variable
withLocal :: TypedVar -> Operand -> Gen a -> Gen a
withLocal x v gen = do
    vPtr <- genVar' x (pure v)
    vPtr <- genStackAllocated v
    withVar x vPtr gen

withVars :: [(TypedVar, Operand)] -> Gen a -> Gen a


@@ 741,23 540,78 @@ withVal x v ga = do
    var <- getVar v
    withVar x var ga

genVar :: Name -> Gen Operand -> Gen Operand
genVar n gen = genStackAllocated n =<< gen

genVar' :: TypedVar -> Gen Operand -> Gen Operand
genVar' (TypedVar x _) gen = do
    n <- newName x
    ptr <- genVar n gen
genStackAllocated :: Operand -> Gen Operand
genStackAllocated v = do
    ptr <- emitAnon (alloca (typeOf v))
    emit (store v ptr)
    pure ptr

genStackAllocated' :: Operand -> Gen Operand
genStackAllocated' v = flip genStackAllocated v =<< newAnonRegister
genType :: MonoAst.Type -> Gen Type
genType = lift . genType'

genStackAllocated :: Name -> Operand -> Gen Operand
genStackAllocated n v = do
    ptr <- emitReg n (alloca (typeOf v))
    emit (store v ptr)
    pure ptr
-- | Convert to the LLVM representation of a type in an expression-context.
genType' :: MonoAst.Type -> Gen' Type
genType' = \case
    TPrim tc -> pure $ case tc of
        TUnit -> typeUnit
        TNat8 -> i8
        TNat16 -> i16
        TNat32 -> i32
        TNat -> i64
        TInt8 -> i8
        TInt16 -> i16
        TInt32 -> i32
        TInt -> i64
        TDouble -> double
        TChar -> i32
        TBool -> typeBool
    TFun a r -> genClosureType a r
    TBox t -> fmap LLType.ptr (genType' t)
    TConst t -> pure (genDatatypeRef t)

-- | A `Fun` is a closure, and follows a certain calling convention
--
--   A closure is represented as a pair where the first element is the pointer
--   to the structure of captures, and the second element is a pointer to the
--   actual function, which takes as first parameter the captures-pointer, and
--   as second parameter the argument.
--
--   An argument of a structure-type is passed by reference, to be compatible
--   with the C calling convention.
genClosureType :: MonoAst.Type -> MonoAst.Type -> Gen' Type
genClosureType a r = genClosureFunType a r
    <&> \c -> typeStruct [LLType.ptr typeUnit, LLType.ptr c]

-- The type of the function itself within the closure
genClosureFunType :: MonoAst.Type -> MonoAst.Type -> Gen' Type
genClosureFunType a r = do
    a' <- genType' a
    r' <- genType' r
    passArgByRef <- passByRef' a'
    let a'' = if passArgByRef then LLType.ptr a' else a'
    returnResultByRef <- passByRef' r'
    pure $ if returnResultByRef
        then FunctionType
            { resultType = LLType.void
            , argumentTypes = [LLType.ptr r', LLType.ptr typeUnit, a'']
            , isVarArg = False
            }
        else FunctionType
            { resultType = r'
            , argumentTypes = [LLType.ptr typeUnit, a'']
            , isVarArg = False
            }

genCapturesType :: [TypedVar] -> Gen Type
genCapturesType = fmap typeStruct . mapM (\(TypedVar _ t) -> genType t)

genDatatypeRef :: MonoAst.TConst -> Type
genDatatypeRef = NamedTypeReference . mkName . mangleTConst

genVariantType :: Span -> [MonoAst.Type] -> Gen' Type
genVariantType totVariants =
    fmap (typeStruct . maybe id ((:) . IntegerType) (tagBitWidth totVariants))
        . mapM genType'

emit :: Instruction -> Gen ()
emit instr = emit' (Do instr)


@@ 804,10 658,7 @@ newName' s = fmap (mkName . (s ++) . show) (registerCount <<+= 1)
-- TODO: Shouldn't need a return type parameter. Should look at global list of
--       hidden builtins or something.
callExtern :: String -> Type -> [Operand] -> FunInstruction
callExtern f rt as = WithRetType (callExtern'' f rt as) rt

callExtern'' :: String -> Type -> [Operand] -> Instruction
callExtern'' f rt as = Call
callExtern f rt as = flip WithRetType rt $ Call
    { tailCallKind = Nothing
    , callingConvention = cfg_callConv
    , returnAttributes = []


@@ 860,7 711,7 @@ extractvalue struct is = fmap
            $ "extractvalue: index out of bounds: "
            ++ (show (typeOf struct) ++ ", " ++ show is)
    getMembers = \case
        NamedTypeReference x -> getMembers =<< lift (lookupDataType x)
        NamedTypeReference x -> getMembers =<< lift (lookupDatatype x)
        StructureType _ members -> pure members
        t ->
            ice


@@ 897,46 748,42 @@ phi = \case
alloca :: Type -> FunInstruction
alloca t = WithRetType (Alloca t Nothing 0 []) (LLType.ptr t)

litU64' :: Word64 -> Operand
litU64' = ConstantOperand . litU64
litI64 :: Int -> Operand
litI64 = ConstantOperand . litI64'

litU64 :: Word64 -> LLConst.Constant
litU64 = litI64 . fromIntegral
litI64' :: Int -> LLConst.Constant
litI64' = LLConst.Int 64 . toInteger

litI64 :: Int -> LLConst.Constant
litI64 = LLConst.Int 64 . toInteger
litI32 :: Int -> Operand
litI32 = ConstantOperand . LLConst.Int 32 . toInteger

litI32 :: Int -> LLConst.Constant
litI32 = LLConst.Int 32 . toInteger
litI8' :: Integral n => n -> LLConst.Constant
litI8' = LLConst.Int 8 . toInteger

litI8 :: Integral n => n -> LLConst.Constant
litI8 = LLConst.Int 8 . toInteger
litBool :: Bool -> Operand
litBool b = ConstantOperand $ LLConst.Int 8 $ if b then 1 else 0

litBool :: Bool -> LLConst.Constant
litBool b = LLConst.Int 8 $ if b then 1 else 0
litDouble :: Double -> Operand
litDouble = ConstantOperand . LLConst.Float . LLFloat.Double

litDouble :: Double -> LLConst.Constant
litDouble = LLConst.Float . LLFloat.Double
litStruct :: [LLConst.Constant] -> Operand
litStruct = ConstantOperand . litStruct'

litStruct :: [LLConst.Constant] -> LLConst.Constant
litStruct = LLConst.Struct Nothing False
litStruct' :: [LLConst.Constant] -> LLConst.Constant
litStruct' = LLConst.Struct Nothing False

-- Seems like just setting the type-field doesn't always do it. Sometimes the
-- named type is just left off? Happened when generating a string. Add a bitcast
-- for safe measure.
litStructOfType :: TConst -> [LLConst.Constant] -> LLConst.Constant
litStructOfType t xs =
-- NOTE: typeOf Struct does not return NamedTypeReference of the structName, so
--       sometimes, an expression created from this will have the wrong
--       type. Specifically, I have observed this behaviour i phi-nodes. To
--       guard against it (until fixed upstream, hopefully), store the value in
--       a variable beforehand.
litStructNamed' :: TConst -> [LLConst.Constant] -> LLConst.Constant
litStructNamed' t xs =
    let tname = mkName (mangleTConst t) in LLConst.Struct (Just tname) False xs

litUnit :: LLConst.Constant
litUnit :: Operand
litUnit = litStruct []

typeCaptures :: [TypedVar] -> Gen Type
typeCaptures = fmap typeStruct . mapM (\(TypedVar _ t) -> toLlvmType t)

typeNamed :: String -> Type
typeNamed = NamedTypeReference . mkName

typeStruct :: [Type] -> Type
typeStruct ts = StructureType { isPacked = False, elementTypes = ts }



@@ 982,97 829,8 @@ mangleType = \case
mangleTConst :: TConst -> String
mangleTConst (c, ts) = c ++ mangleInst ts

passByRef :: Type -> Gen Bool
passByRef = lift . passByRef'

-- NOTE: This post is helpful:
--       https://stackoverflow.com/questions/42411819/c-on-x86-64-when-are-structs-classes-passed-and-returned-in-registers
--       Also, official docs:
--       https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf
--       particularly section 3.2.3 Parameter Passing (p18).
passByRef' :: Type -> Gen' Bool
passByRef' = \case
    NamedTypeReference x -> passByRef' =<< views dataTypes (Map.! x)
    -- Simple scalar types. They go in registers.
    VoidType -> pure False
    IntegerType _ -> pure False
    PointerType _ _ -> pure False
    FloatingPointType _ -> pure False
    -- Functions are not POD (Plain Ol' Data), so they are passed on the stack.
    FunctionType _ _ _ -> pure True
    -- TODO: Investigate how exactly SIMD vectors are to be passed when/if we
    --       ever add support for that in the rest of the compiler.
    VectorType _ _ -> pure True
    -- Aggregate types can either be passed on stack or in regs, depending on
    -- what they contain.
    t@(StructureType _ us) -> do
        size <- sizeof t
        if size > 16 then pure True else fmap or (mapM passByRef' us)
    ArrayType _ u -> do
        size <- sizeof u
        if size > 16 then pure True else passByRef' u
    -- N/A
    MetadataType -> ice "passByRef of MetadataType"
    LabelType -> ice "passByRef of LabelType"
    TokenType -> ice "passByRef of TokenType"

-- TODO: Handle packed
--
-- TODO: Handle different data layouts. Check out LLVMs DataLayout class and
--       impl of `getTypeAllocSize`.
--       https://llvm.org/doxygen/classllvm_1_1DataLayout.html
--
-- | Haskell-native implementation of `sizeof`, in contrast to
--   `getTypeAllocSize` of `llvm-hs`.
--
--   The problem with `getTypeAllocSize` is that it requires an `EncodeAST`
--   monad and messy manipulations. Specifically, I had some recursive bindings
--   going on, but to represent them in a monad I needed `mfix`, but `EncodeAST`
--   didn't have `mfix`!
--
--   See the [System V ABI docs](https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf)
--   for more info.
sizeof :: Type -> Gen' Word64
sizeof = \case
    NamedTypeReference x -> sizeof =<< lookupDataType x
    IntegerType bits -> pure (fromIntegral (toBytesCeil bits))
    PointerType _ _ -> pure 8
    FloatingPointType HalfFP -> pure 2
    FloatingPointType FloatFP -> pure 4
    FloatingPointType DoubleFP -> pure 8
    FloatingPointType FP128FP -> pure 16
    FloatingPointType X86_FP80FP -> pure 16
    FloatingPointType PPC_FP128FP -> pure 16
    StructureType _ us -> foldlM addMember 0 us
    VectorType n u -> fmap (fromIntegral n *) (sizeof u)
    ArrayType n u -> fmap (n *) (sizeof u)
    VoidType -> ice "sizeof VoidType"
    FunctionType _ _ _ -> ice "sizeof FunctionType"
    MetadataType -> ice "sizeof MetadataType"
    LabelType -> ice "sizeof LabelType"
    TokenType -> ice "sizeof TokenType"
  where
    toBytesCeil nbits = div (nbits + 7) 8
    addMember accSize u = do
        align <- alignmentof u
        let padding = mod (align - accSize) align
        size <- sizeof u
        pure (accSize + padding + size)

alignmentof :: Type -> Gen' Word64
alignmentof = \case
    NamedTypeReference x -> alignmentof =<< lookupDataType x
    StructureType _ us -> fmap maximum (traverse alignmentof us)
    VectorType _ u -> alignmentof u
    ArrayType _ u -> alignmentof u
    t -> sizeof t

lookupDataType :: Name -> Gen' Type
lookupDataType x = views dataTypes (Map.lookup x) >>= \case
    Just u -> pure u
    Nothing -> ice $ "Undefined datatype " ++ show x

-- TODO: Try out "tailcc" - Tail callable calling convention. It looks like
--       exactly what I want!
cfg_callConv :: LLCallConv.CallingConvention
cfg_callConv = LLCallConv.C
lookupVar :: TypedVar -> Gen Val
lookupVar x = do
    views env (Map.lookup x) >>= \case
        Just var -> pure (VVar var)
        Nothing -> ice $ "Undefined variable " ++ show x

A src/Gen.hs => src/Gen.hs +77 -0
@@ 0,0 1,77 @@
{-# LANGUAGE LambdaCase, TemplateHaskell #-}

module Gen
    ( Gen
    , Gen'
    , Out(..)
    , outBlocks
    , outStrings
    , outFuncs
    , St(..)
    , currentBlockLabel
    , currentBlockInstrs
    , registerCount
    , lambdaParentFunc
    , outerLambdaN
    , Env(..)
    , env
    , dataTypes
    , lookupDatatype
    )
where

import LLVM.AST
import Control.Monad.Writer
import Control.Monad.State
import Control.Monad.Reader
import qualified Data.Map as Map
import Data.Map (Map)
import Control.Lens (makeLenses, views)

import Misc
import MonoAst hiding (Type, Const)


data Env = Env
    -- TODO: Could operands in env be Val instead? I.e., either stack-allocated
    --       or local?
    { _env :: Map TypedVar Operand  -- ^ Environment of stack allocated variables
    , _dataTypes :: Map Name Type
    }
makeLenses ''Env

data St = St
    { _currentBlockLabel :: Name
    , _currentBlockInstrs :: [Named Instruction]
    , _registerCount :: Word
    -- | Keep track of the parent function name so that we can name the
    --   outermost lambdas of a function definition well.
    , _lambdaParentFunc :: Maybe String
    , _outerLambdaN :: Word
    }
makeLenses ''St

type Gen' = StateT St (Reader Env)

-- | The output of generating a function
data Out = Out
    { _outBlocks :: [BasicBlock]
    , _outStrings :: [(Name, String)]
    , _outFuncs :: [(Name, [TypedVar], TypedVar, Expr)]
    }
makeLenses ''Out

type Gen = WriterT Out Gen'


instance Semigroup Out where
    Out bs1 ss1 fs1 <> Out bs2 ss2 fs2 =
        Out (bs1 <> bs2) (ss1 <> ss2) (fs1 <> fs2)
instance Monoid Out where
    mempty = Out [] [] []


lookupDatatype :: Name -> Gen' Type
lookupDatatype x = views dataTypes (Map.lookup x) >>= \case
    Just u -> pure u
    Nothing -> ice $ "Undefined datatype " ++ show x