~jojo/Carth

ref: cbb5eacfb2b0879db8e51348ae16be4c8b7654cf Carth/src/Codegen.hs -rw-r--r-- 16.4 KiB
cbb5eacfJoJo Fix regression where var bindings in let didn't have others in env 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
{-# LANGUAGE OverloadedStrings, LambdaCase, TupleSections, FlexibleContexts #-}

-- | Generation of LLVM IR code from our monomorphic AST.
module Codegen (codegen) where

import LLVM.AST hiding (args)
import LLVM.AST.Typed
import LLVM.AST.Type hiding (ptr)
import LLVM.AST.DataLayout
import qualified LLSubprog
import qualified LLCompunit
import qualified LLVM.AST.Operand as LLOp
import qualified LLVM.AST.Type as LLType
import qualified LLVM.AST.Constant as LLConst
import Data.String
import System.FilePath
import Control.Monad.Writer
import qualified Data.Map as Map
import Data.Map (Map)
import qualified Data.Set as Set
import Data.Word
import Data.Maybe
import Data.Foldable
import Data.List
import Data.Function
import Data.Functor
import Data.Bifunctor
import Control.Applicative
import Lens.Micro.Platform (use, assign)

import Misc
import SrcPos
import FreeVars
import qualified Monomorphic
import Monomorphic hiding (Type, Const)
import Selections
import Gen
import Extern


codegen :: DataLayout -> FilePath -> Program -> Module
codegen layout moduleFilePath (Program (Topo defs) tdefs externs) =
    let
        (tdefs', externs', globDefs) = runGen' $ do
            (enums, tdefs'') <- defineDataTypes tdefs
            augment enumTypes enums
                $ augment dataTypes tdefs''
                $ withExternSigs externs
                $ withGlobDefSigs (map (second unpos) defs)
                $ do
                    es <- genExterns externs
                    ds <- liftA2 (:) genMain (fmap join (mapM genGlobDef defs))
                    pure (tdefs'', es, ds)
    in
        Module
            { moduleName = fromString ((takeBaseName moduleFilePath))
            , moduleSourceFileName = fromString moduleFilePath
            , moduleDataLayout = Just layout
            , moduleTargetTriple = Nothing
            , moduleDefinitions = concat
                [ map
                    (\(n, tmax) -> TypeDefinition n (Just (typeStruct tmax)))
                    (Map.toList tdefs')
                , genBuiltins
                , externs'
                , globDefs
                , globMetadataDefs
                ]
            }
  where
    withGlobDefSigs sigs ga = do
        sigs' <- forM sigs $ \(v@(TypedVar x t), (us, _)) -> do
            t' <- genType' t
            pure
                ( v
                , ConstantOperand $ LLConst.GlobalReference
                    (LLType.ptr t')
                    (mkName (mangleName (x, us)))
                )
        augment env (Map.fromList sigs') ga
    fileId = MetadataNodeID 1
    debugInfoVersionId = MetadataNodeID 2
    globMetadataDefs =
        [ MetadataNodeDefinition compileUnitId
            $ DINode (LLOp.DIScope (LLOp.DICompileUnit compileUnitDef))
        , MetadataNodeDefinition fileId
            $ DINode (LLOp.DIScope (LLOp.DIFile fileDef))
        , MetadataNodeDefinition debugInfoVersionId $ MDTuple
            [ Just (MDValue (litI32 2))
            , Just (MDString "Debug Info Version")
            , Just (MDValue (litI32 3))
            ]
        , NamedMetadataDefinition "llvm.dbg.cu" [compileUnitId]
        , NamedMetadataDefinition "llvm.module.flags" [debugInfoVersionId]
        ]
    compileUnitDef = LLCompunit.CompileUnit
        { LLCompunit.language =
            let unstandardized_c = 1 in unstandardized_c
        , LLCompunit.file = MDRef fileId
        , LLCompunit.producer = "carth version alpha"
        , LLCompunit.optimized = False
        , LLCompunit.flags = ""
        , LLCompunit.runtimeVersion = 0
        , LLCompunit.splitDebugFileName = ""
        , LLCompunit.emissionKind = LLOp.FullDebug
        , LLCompunit.enums = []
        , LLCompunit.retainedTypes = []
        , LLCompunit.globals = []
        , LLCompunit.imports = []
        , LLCompunit.macros = []
        , LLCompunit.dWOId = 0
        , LLCompunit.splitDebugInlining = False
        , LLCompunit.debugInfoForProfiling = False
        , LLCompunit.nameTableKind = LLOp.NameTableKindNone
        , LLCompunit.debugBaseAddress = False
        }
    fileDef =
        let (dir, file) = splitFileName moduleFilePath
        in
            LLOp.File
                { LLSubprog.filename = fromString file
                , LLSubprog.directory = fromString dir
                , LLSubprog.checksum = Nothing
                }

-- | A data-type is a tagged union, and we represent it in LLVM as a struct
--   where, if there are more than 1 variant, the first element is the
--   variant-index. The variant index is represented as an integer with the
--   smallest width 2^n that can fit all variants. The rest of the elements are
--   the field-types of the largest variant wrt allocation size.
--
--   If none of the variants of the data-type has any members, we say it's an
--   enumeration, which is represented as a single integer, equal to the size it
--   would have been as a tag. If further there's only a single variant, the
--   data-type is represented as `{}`.
defineDataTypes :: TypeDefs -> Gen' (Map Name Word32, Map Name [Type])
defineDataTypes tds = do
    let (enums, datas) = partition (all null . snd) tds
    let enums' = Map.fromList $ map
            (\(tc, vs) ->
                ( mkName (mangleTConst tc)
                , fromMaybe 0 (tagBitWidth (fromIntegral (length vs)))
                )
            )
            enums
    datas'' <- mfix $ \datas' ->
        fmap Map.fromList
            $ augment enumTypes enums'
            $ augment dataTypes datas'
            $ forM datas
            $ \(tc, vs) -> do
                let n = mkName (mangleTConst tc)
                let totVariants = fromIntegral (length vs)
                ts <- mapM (genVariantType totVariants) vs
                sizedTs <- mapM (\t -> sizeof (typeStruct t) <&> (, t)) ts
                if null sizedTs
                    then ice ("defineDataTypes: sizedTs empty for " ++ show n)
                    else pure (n, snd (maximum sizedTs))
    pure (enums', datas'')

genMain :: Gen' Definition
genMain = do
    assign currentBlockLabel (mkName "entry")
    assign currentBlockInstrs []
    Out basicBlocks _ _ _ <- execWriterT $ do
        emitDo' (callExtern "install_stackoverflow_handler" [])
        f <- lookupVar (TypedVar "main" mainType)
        _ <- app f (VLocal litUnit)
        commitFinalFuncBlock (ret (litI32 0))
    pure (GlobalDefinition (simpleFunc (mkName "main") [] i32 basicBlocks []))

-- TODO: Change global defs to a new type that can be generated by llvm. As it
--       is now, global non-function variables can't be straight-forwardly
--       generated in general. Either, initialization is delayed until program
--       start, or an interpretation step is added between monomorphization and
--       codegen that evaluates all expressions in relevant contexts, like
--       constexprs.
genGlobDef
    :: (TypedVar, WithPos ([Monomorphic.Type], Expr)) -> Gen' [Definition]
genGlobDef (TypedVar v _, WithPos dpos (ts, (Expr _ e))) = case e of
    Fun p (body, _) -> do
        let var = (v, ts)
        let name = mangleName var
        assign lambdaParentFunc (Just name)
        assign outerLambdaN 1
        let fName = mkName (name ++ "_func")
        (f, gs) <- genFunDef (fName, [], dpos, p, genExpr body)
        let fRef = LLConst.GlobalReference (LLType.ptr (typeOf f)) fName
        let capturesType = LLType.ptr typeUnit
        let captures = LLConst.Null capturesType
        let closure = litStruct [captures, fRef]
        let closureDef = simpleGlobVar (mkName name) (typeOf closure) closure
        pure (GlobalDefinition closureDef : GlobalDefinition f : gs)
    _ -> nyi $ "Global non-function defs: " ++ show e

genExpr :: Expr -> Gen Val
genExpr (Expr pos expr) = locally srcPos (pos <|>) $ do
    parent <- use lambdaParentFunc <* assign lambdaParentFunc Nothing
    case expr of
        Lit c -> genConst c
        Var (TypedVar x t) -> lookupVar (TypedVar x t)
        App f e _ -> genApp f e
        If p c a -> genIf p c a
        Fun p b -> assign lambdaParentFunc parent *> genExprLambda p b
        Let ds b -> genLet ds b
        Match e cs tbody -> genMatch e cs =<< genType tbody
        Ction c -> genCtion c
        Box e -> genBox =<< genExpr e
        Deref e -> genDeref e
        Absurd t -> fmap (VLocal . undef) (genType t)

genExprLambda :: TypedVar -> (Expr, Monomorphic.Type) -> Gen Val
genExprLambda p (b, bt) = do
    let fvXs = Set.toList (Set.delete p (freeVars b))
    bt' <- genType bt
    genLambda fvXs p (genExpr b, bt')

genConst :: Monomorphic.Const -> Gen Val
genConst = \case
    Int n -> pure (VLocal (litI64 n))
    F64 x -> pure (VLocal (litF64 x))
    Str s -> genStrLit s

genStrLit :: String -> Gen Val
genStrLit s = do
    var <- newName "strlit"
    scribe outStrings [(var, s)]
    pure $ VVar $ ConstantOperand
        (LLConst.GlobalReference (LLType.ptr typeStr) var)

-- | Beta-reduction and closure application
genApp :: Expr -> Expr -> Gen Val
genApp fe' ae' = genApp' (fe', [ae'])
  where
    -- TODO: Could/should the beta-reduction maybe happen in an earlier stage,
    --       like when desugaring?
    genApp' = \case
        (Expr _ (Fun p (b, _)), ae : aes) -> do
            a <- genExpr ae
            withVal p a (genApp' (b, aes))
        (Expr _ (App fe ae _), aes) -> genApp' (fe, ae : aes)
        (fe, []) -> genExpr fe
        (fe, aes) -> do
            closure <- genExpr fe
            as <- mapM genExpr aes
            foldlM (\f a -> app f a) closure as

app :: Val -> Val -> Gen Val
app closure a = do
    closure' <- getLocal closure
    captures <- emitReg "captures" =<< extractvalue closure' [0]
    f <- emitReg "function" =<< extractvalue closure' [1]
    a' <- getLocal a
    let args = [(captures, []), (a', [])]
    fmap VLocal (emitAnonReg (call f args))
  where
    call f as =
        WithRetType (callVoid f as) (getFunRet (getPointee (typeOf f)))

genIf :: Expr -> Expr -> Expr -> Gen Val
genIf pred' conseq alt = do
    predV <- genExpr pred'
    genCondBr predV (genExpr conseq) (genExpr alt)

genCondBr :: Val -> Gen Val -> Gen Val -> Gen Val
genCondBr predV genConseq genAlt = do
    predV' <- emitAnonReg . flip trunc i1 =<< getLocal predV
    conseqL <- newName "consequent"
    altL <- newName "alternative"
    nextL <- newName "next"
    commitToNewBlock (condbr predV' conseqL altL) conseqL
    conseqV <- getLocal =<< genConseq
    fromConseqL <- use currentBlockLabel
    commitToNewBlock (br nextL) altL
    altV <- getLocal =<< genAlt
    fromAltL <- use currentBlockLabel
    commitToNewBlock (br nextL) nextL
    fmap VLocal (emitAnonReg (phi [(conseqV, fromConseqL), (altV, fromAltL)]))

genLet :: Defs -> Expr -> Gen Val
genLet (Topo ds) letBody = do
    -- For both function and variable bindings, we need separate the definition
    -- into two passes, where the first pre-allocates some stuff.
    (binds, cs) <- fmap unzip $ forM ds $ \case
        (v, WithPos _ (_, Expr _ (Fun p (fb, fbt)))) -> do
            let fvXs = Set.toList (Set.delete p (freeVars fb))
            tcaptures <- fmap
                typeStruct
                (mapM (\(TypedVar _ t) -> genType t) fvXs)
            captures <- genHeapAllocGeneric tcaptures
            fbt' <- genType fbt
            l <-
                getVar
                    =<< genLambda' p (genExpr fb, fbt') (VLocal captures) fvXs
            pure ((v, l), Left (captures, fvXs))
        (v@(TypedVar n t), WithPos _ (_, e)) -> do
            t' <- genType t
            mem <- emitReg n (alloca t')
            pure ((v, mem), Right e)
    withVars binds $ do
        forM_ (zip binds cs) $ \case
            (_, Left (captures, fvXs)) -> populateCaptures captures fvXs
            ((_, mem), Right e) -> do
                x <- getLocal =<< genExpr e
                emitDo (store x mem)
        genExpr letBody

genMatch :: Expr -> DecisionTree -> Type -> Gen Val
genMatch m dt tbody = do
    -- TODO: Do we have to convert it to an operand here already? Keeping it as
    --       Val would probably eliminate a needless stack allocation.
    m' <- getLocal =<< genExpr m
    genDecisionTree tbody dt (newSelections m')

genDecisionTree :: Type -> DecisionTree -> Selections Operand -> Gen Val
genDecisionTree tbody = \case
    Monomorphic.DLeaf l -> genDecisionLeaf l
    Monomorphic.DSwitch selector cs def ->
        genDecisionSwitchIx selector cs def tbody
    Monomorphic.DSwitchStr selector cs def ->
        genDecisionSwitchStr selector cs def tbody

genDecisionSwitchIx
    :: Monomorphic.Access
    -> Map VariantIx DecisionTree
    -> DecisionTree
    -> Type
    -> Selections Operand
    -> Gen Val
genDecisionSwitchIx selector cs def tbody selections = do
    let (variantIxs, variantDts) = unzip (Map.toAscList cs)
    variantLs <- mapM (newName . (++ "_") . ("variant_" ++) . show) variantIxs
    defaultL <- newName "default"
    nextL <- newName "next"
    (m, selections') <- select selAs selSub selDeref selector selections
    mVariantIx <- case typeOf m of
        IntegerType _ -> pure m
        _ -> emitReg "found_variant_ix" =<< extractvalue m [0]
    let ixBits = getIntBitWidth (typeOf mVariantIx)
    let litIxInt = LLConst.Int ixBits
    let dests' = zip (map litIxInt variantIxs) variantLs
    commitToNewBlock (switch mVariantIx defaultL dests') defaultL
    let genDecisionTree' dt = do
            u <- genDecisionTree tbody dt selections'
            liftA2 (,) (getLocal u) (use currentBlockLabel)
    v <- genDecisionTree' def
    let genCase l dt = do
            commitToNewBlock (br nextL) l
            genDecisionTree' dt
    vs <- zipWithM genCase variantLs variantDts
    commitToNewBlock (br nextL) nextL
    fmap VLocal (emitAnonReg (phi (v : vs)))

genDecisionSwitchStr
    :: Monomorphic.Access
    -> Map String DecisionTree
    -> DecisionTree
    -> Type
    -> Selections Operand
    -> Gen Val
genDecisionSwitchStr selector cs def tbody selections = do
    (matchee, selections') <- select selAs selSub selDeref selector selections
    let cs' = Map.toAscList cs
    let genCase :: (String, DecisionTree) -> Gen Val -> Gen (Gen Val)
        genCase (s, dt) next = do
            s' <- genStrLit s
            isMatch <- genStrEq (VLocal matchee) s'
            -- Do some wrapping to preserve effect/Gen order
            pure (genCondBr isMatch (genDT dt) next)
        genDT dt = genDecisionTree tbody dt selections'
    f <- foldrM genCase (genDT def) cs'
    f

genDecisionLeaf
    :: (Monomorphic.VarBindings, Expr) -> Selections Operand -> Gen Val
genDecisionLeaf (bs, e) selections = do
    bs' <- selectVarBindings selAs selSub selDeref selections bs
    withLocals bs' (genExpr e)

selAs :: Span -> [Monomorphic.Type] -> Operand -> Gen Operand
selAs totVariants ts matchee = do
    tvariant <- fmap typeStruct (lift (genVariantType totVariants ts))
    let tgeneric = typeOf matchee
    pGeneric <- emitReg "ction_ptr_nominal" (alloca tgeneric)
    emitDo (store matchee pGeneric)
    p <- emitReg "ction_ptr_structural" (bitcast pGeneric (LLType.ptr tvariant))
    emitReg "ction" (load p)

selSub :: Span -> Word32 -> Operand -> Gen Operand
selSub span' i matchee =
    let tagOffset = if span' > 1 then 1 else 0
    in emitReg "submatchee" =<< extractvalue matchee (pure (tagOffset + i))

selDeref :: Operand -> Gen Operand
selDeref x = emitAnonReg (load x)

genCtion :: Monomorphic.Ction -> Gen Val
genCtion (i, span', dataType, as) = do
    lookupEnum dataType & lift >>= \case
        Just 0 -> pure (VLocal litUnit)
        Just w -> pure (VLocal (ConstantOperand (LLConst.Int w i)))
        Nothing -> do
            as' <- mapM genExpr as
            let tag = maybe
                    id
                    ((:) . VLocal . ConstantOperand . flip LLConst.Int i)
                    (tagBitWidth span')
            s <- getLocal =<< genStruct (tag as')
            let t = typeOf s
            let tgeneric = genDatatypeRef dataType
            pGeneric <- emitReg "ction_ptr_nominal" (alloca tgeneric)
            p <- emitReg
                "ction_ptr_structural"
                (bitcast pGeneric (LLType.ptr t))
            emitDo (store s p)
            pure (VVar pGeneric)

genBox :: Val -> Gen Val
genBox = fmap fst . genBox'

genBox' :: Val -> Gen (Val, Val)
genBox' x = do
    let t = typeOf x
    ptrGeneric <- genHeapAllocGeneric t
    ptr <- emitAnonReg (bitcast ptrGeneric (LLType.ptr t))
    x' <- getLocal x
    emitDo (store x' ptr)
    pure (VLocal ptr, VLocal ptrGeneric)

genDeref :: Expr -> Gen Val
genDeref e = genExpr e >>= \case
    VVar x -> fmap VVar (selDeref x)
    VLocal x -> pure (VVar x)

genStrEq :: Val -> Val -> Gen Val
genStrEq s1 s2 = do
    s1' <- getLocal s1
    s2' <- getLocal s2
    b <- emitAnonReg (callExtern "carth_str_eq" [s1', s2'])
    pure (VLocal b)