~jojo/Carth

9eb4a48cffd05e7ca34f0a2b74fe7baeeb4bc2cb — JoJo 7 months ago e17c3a3
Codegen: Do more at Val & ptr level everywhere. getelemementptr etc.

The website said to "avoid loads and stores of large aggregate type"
to improve performance. This seems like solid advice, so this commit
changes a bunch of different stuff in the codegen to keep structs in
pointers for as long as possible and use getelementptr instead of
extractvalue/insertvalue. Particularly in the pattern matching stuff,
change the selection stuff to operate on Val instead of operand, using
new convenience functions like genIndexStruct which does extractvalue
if the Val is a local, and getelementptr if it's a stack var.

Performance improvement is not obviously noticable for my small
programs, but it seems less code is generated overall. .dbg.ll of
Fizzbuzz decreased from like 1700 to 1500 lines or something. That's
at least a 10% improvement.

https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-loads-and-stores-of-large-aggregate-type
4 files changed, 93 insertions(+), 68 deletions(-)

M TODO.org
M src/Codegen.hs
M src/Gen.hs
M src/Selections.hs
M TODO.org => TODO.org +7 -0
@@ 743,3 743,10 @@ Features and other stuff to do/implement in/around Carth.
  Example use case: We don't want to have to use linear types to
  manually destroy Lazy values when we're done with them, but we still
  need to make sure that their mutexes are destroyed at some point.
* NEXT "Use ptrtoint/inttoptr sparingly, prefer GEPs"
  https://llvm.org/docs/Frontend/PerformanceTips.html#other-things-to-consider

  I don't think I use ptrtoint/inttoptr much or at all in the compiler
  itself, but the ~ptr/+~ function in the stdlib transmutes to int for
  addition. Should add a builtin virtual function that uses gep to
  offset pointer.

M src/Codegen.hs => src/Codegen.hs +29 -42
@@ 178,8 178,8 @@ genMain = do
    Out basicBlocks _ _ _ <- execWriterT $ do
        emitDo' =<< callBuiltin "install_stackoverflow_handler" []
        emitDo (callIntern Nothing init_ [(null' typeGenericPtr, []), (litUnit, [])])
        iof <- getLocal =<< lookupVar (TypedVar "main" mainType)
        f <- fmap VLocal $ emitAnonReg =<< extractvalue iof [0]
        iof <- lookupVar (TypedVar "main" mainType)
        f <- genIndexStruct iof [0]
        _ <- app (Just NoTail) f (VLocal litRealWorld)
        commitFinalFuncBlock (ret (litI32 0))
    pure (GlobalDefinition (externFunc (mkName "main") [] i32 basicBlocks []))


@@ 383,47 383,39 @@ genLet' def genBody = case def of

genTailMatch :: Expr -> DecisionTree -> Type -> Gen ()
genTailMatch m dt tbody = do
    m' <- getLocal =<< genExpr m
    m' <- genExpr m
    genTailDecisionTree tbody dt (newSelections m')

genMatch :: Expr -> DecisionTree -> Type -> Gen Val
genMatch m dt tbody = do
    -- TODO: Do we have to convert it to an operand here already? Keeping it as
    --       Val would probably eliminate a needless stack allocation.
    m' <- getLocal =<< genExpr m
    m' <- genExpr m
    genDecisionTree tbody dt (newSelections m')

genTailDecisionTree :: Type -> DecisionTree -> Selections Operand -> Gen ()
genTailDecisionTree :: Type -> DecisionTree -> Selections Val -> Gen ()
genTailDecisionTree = genDecisionTree' genTailExpr genTailCondBr genTailCases

genDecisionTree :: Type -> DecisionTree -> Selections Operand -> Gen Val
genDecisionTree :: Type -> DecisionTree -> Selections Val -> Gen Val
genDecisionTree = genDecisionTree' genExpr genCondBr genCases

genDecisionTree'
    :: (Expr -> Gen a)
    -> (Val -> Gen a -> Gen a -> Gen a)
    -> ( Type
       -> Selections Operand
       -> [Name]
       -> [DecisionTree]
       -> DecisionTree
       -> Gen a
       )
    -> (Type -> Selections Val -> [Name] -> [DecisionTree] -> DecisionTree -> Gen a)
    -> Type
    -> DecisionTree
    -> Selections Operand
    -> Selections Val
    -> Gen a
genDecisionTree' genExpr' genCondBr' genCases' tbody =
    let genDecisionLeaf (bs, e) selections = do
            bs' <- selectVarBindings selAs selSub selDeref selections bs
            withLocals bs' (genExpr' e)
            bs' <- selectVarBindings selAs selSub genDeref selections bs
            withVals bs' (genExpr' e)

        genDecisionSwitchIx selector cs def selections = do
            let (variantIxs, variantDts) = unzip (Map.toAscList cs)
            (m, selections') <- select selAs selSub selDeref selector selections
            mVariantIx <- case typeOf m of
            (m, selections') <- select selAs selSub genDeref selector selections
            mVariantIx <- getLocal =<< case typeOf m of
                IntegerType _ -> pure m
                _ -> emitReg "found_variant_ix" =<< extractvalue m [0]
                _ -> genIndexStruct m [0]
            let ixBits = getIntBitWidth (typeOf mVariantIx)
            let litIxInt = LLConst.Int ixBits
            variantLs <- mapM (newName . (++ "_") . ("variant_" ++) . show) variantIxs


@@ 433,12 425,12 @@ genDecisionTree' genExpr' genCondBr' genCases' tbody =
            genCases' tbody selections' variantLs variantDts def

        genDecisionSwitchStr selector cs def selections = do
            (matchee, selections') <- select selAs selSub selDeref selector selections
            (matchee, selections') <- select selAs selSub genDeref selector selections
            let cs' = Map.toAscList cs
            let genCase (s, dt) next = do
                    s' <- genStrLit s
                    isMatch <- genStrEq (VLocal matchee) s'
                    -- Do some wrapping to preserve effect/Gen order
                    isMatch <- genStrEq matchee s'
                    -- Do some wrapping to preserve effect order
                    pure $ genCondBr' isMatch (genDT dt selections') next
            join (foldrM genCase (genDT def selections') cs')



@@ 449,15 441,14 @@ genDecisionTree' genExpr' genCondBr' genCases' tbody =
    in  genDT

genTailCases
    :: Type -> Selections Operand -> [Name] -> [DecisionTree] -> DecisionTree -> Gen ()
    :: Type -> Selections Val -> [Name] -> [DecisionTree] -> DecisionTree -> Gen ()
genTailCases tbody selections variantLs variantDts def = do
    genTailDecisionTree tbody def selections
    forM_ (zip variantLs variantDts) $ \(l, dt) -> do
        assign currentBlockLabel l
        genTailDecisionTree tbody dt selections

genCases
    :: Type -> Selections Operand -> [Name] -> [DecisionTree] -> DecisionTree -> Gen Val
genCases :: Type -> Selections Val -> [Name] -> [DecisionTree] -> DecisionTree -> Gen Val
genCases tbody selections variantLs variantDts def = do
    nextL <- newName "next"
    let genDT dt = liftA2 (,)


@@ 471,19 462,15 @@ genCases tbody selections variantLs variantDts def = do
    commitToNewBlock (br nextL) nextL
    fmap VLocal (emitAnonReg (phi (v : vs)))

selAs :: Span -> [Ast.Type] -> Operand -> Gen Operand
selAs :: Span -> [Ast.Type] -> Val -> Gen Val
selAs totVariants ts matchee = do
    tvariant <- fmap typeStruct (lift (genVariantType totVariants ts))
    let tgeneric = typeOf matchee
    pGeneric <- emitReg "ction_ptr_nominal" (alloca tgeneric)
    emitDo (store matchee pGeneric)
    p <- emitReg "ction_ptr_structural" (bitcast pGeneric (LLType.ptr tvariant))
    emitReg "ction" (load p)
    pGeneric <- getVar matchee
    fmap VVar (emitReg "ction_ptr_structural" (bitcast pGeneric (LLType.ptr tvariant)))

selSub :: Span -> Word32 -> Operand -> Gen Operand
selSub :: Span -> Word32 -> Val -> Gen Val
selSub span' i matchee =
    let tagOffset = if span' > 1 then 1 else 0
    in  emitReg "submatchee" =<< extractvalue matchee (pure (tagOffset + i))
    let tagOffset = if span' > 1 then 1 else 0 in genIndexStruct matchee [tagOffset + i]

genCtion :: Ast.Ction -> Gen Val
genCtion (i, span', dataType, as) = do


@@ 492,15 479,15 @@ genCtion (i, span', dataType, as) = do
        Just w -> pure (VLocal (ConstantOperand (LLConst.Int w i)))
        Nothing -> do
            as' <- mapM genExpr as
            let tag = maybe id
                            ((:) . VLocal . ConstantOperand . flip LLConst.Int i)
                            (tagBitWidth span')
            s <- getLocal =<< genStruct (tag as')
            let t = typeOf s
            let tagged = maybe
                    as'
                    ((: as') . VLocal . ConstantOperand . flip LLConst.Int i)
                    (tagBitWidth span')
            let t = typeStruct (map typeOf tagged)
            let tgeneric = genDatatypeRef dataType
            pGeneric <- emitReg "ction_ptr_nominal" (alloca tgeneric)
            p <- emitReg "ction_ptr_structural" (bitcast pGeneric (LLType.ptr t))
            emitDo (store s p)
            genStructInPtr p tagged
            pure (VVar pGeneric)

genStrEq :: Val -> Val -> Gen Val

M src/Gen.hs => src/Gen.hs +55 -24
@@ 132,12 132,12 @@ genFunDef (name, fvs, dpos, ptv@(TypedVar px pt), genBody) = do
        -- Two equal SrcPos's in different scopes are not equal at the
        -- metadata level. Reset cache every scope.
        assign srcPosToMetadata Map.empty
        (capturesParam, captureLocals) <- genExtractCaptures
        (capturesParam, captureMembers) <- genExtractCaptures
        pt' <- genType pt
        px' <- newName px
        let pRef = LocalReference pt' px'
        rt' <- locallySet srcPos (Just dpos)
            $ withLocal ptv pRef (withLocals captureLocals genBody)
            $ withLocal ptv pRef (withVals captureMembers genBody)
        let fParams' = [uncurry Parameter capturesParam [], Parameter pt' px' []]
        pure (rt', fParams')
    (funScopeMdId, funScopeMdDef) <- defineFunScopeMetadata


@@ 163,6 163,8 @@ genFunDef (name, fvs, dpos, ptv@(TypedVar px pt), genBody) = do
            str = litStructNamed ("Str", []) [array]
            defStr = simpleGlobConst strName typeStr str
        pure (map GlobalDefinition [defInner, defStr])

    genExtractCaptures :: Gen ((Type, Name), [(TypedVar, Val)])
    genExtractCaptures = do
        capturesName <- newName "captures"
        let capturesPtrGeneric = LocalReference typeGenericPtr capturesName


@@ 173,11 175,13 @@ genFunDef (name, fvs, dpos, ptv@(TypedVar px pt), genBody) = do
                capturesType <- genCapturesType fvs
                capturesPtr <- emitAnonReg
                    (bitcast capturesPtrGeneric (LLType.ptr capturesType))
                captures <- emitAnonReg (load capturesPtr)
                captureVals <- mapM
                    (\(TypedVar x _, i) -> emitReg x =<< extractvalue captures [i])
                    (\(TypedVar x _, i) ->
                        VVar <$> (emitReg x =<< getelementptr capturesPtr (litI64 0) [i])
                    )
                    (zip fvs [0 ..])
                pure (zip fvs captureVals)

    defineSrcPos funScopeMdRef (SrcPos _ line col _, mdId) = do
        let loc =
                LLOp.DILocation


@@ 267,9 271,10 @@ genLambda fvXs p body = do

populateCaptures :: Operand -> [TypedVar] -> Gen ()
populateCaptures ptrGeneric fvXs = do
    captures <- getLocal =<< genStruct =<< mapM lookupVar fvXs
    ptr <- emitAnonReg (bitcast ptrGeneric (LLType.ptr (typeOf captures)))
    emitDo (store captures ptr)
    vs <- mapM lookupVar fvXs
    let t = typeStruct (map typeOf vs)
    ptr <- emitAnonReg (bitcast ptrGeneric (LLType.ptr t))
    genStructInPtr ptr vs

genLambda' :: TypedVar -> (Gen (), Type) -> Val -> [TypedVar] -> Gen Val
genLambda' p@(TypedVar _ pt) (genBody, bt) captures fvXs = do


@@ 432,6 437,12 @@ genStruct xs = do
                         (undef t)
                         (zip [0 ..] xs')

genStructInPtr :: Operand -> [Val] -> Gen ()
genStructInPtr ptr vs = forM_ (zip [0 ..] vs) $ \(i, v) -> do
    dest <- emitAnonReg =<< getelementptr ptr (litI64 0) [i]
    v' <- getLocal v
    emitDo (store v' dest)

genHeapAllocGeneric :: Type -> Gen Operand
genHeapAllocGeneric t = do
    size <- fmap (litI64 . fromIntegral) (lift (sizeof t))


@@ 608,10 619,6 @@ genAppBuiltinVirtual (TypedVar g t) aes = do
            (FloatingPointType _, IntegerType _) ->
                emit' $ if isInt b then fptosi else fptoui
            _ -> throwError (CastErr pos a b)
    genDeref :: Val -> Gen Val
    genDeref = \case
        VVar x -> fmap VVar (selDeref x)
        VLocal x -> pure (VVar x)
    genStore :: Val -> Val -> Gen Val
    genStore x p = do
        x' <- getLocal x


@@ 658,8 665,10 @@ app tailkind closure a = do
        then emitDo (callIntern tailkind f as) $> litUnit
        else emitAnonReg $ WithRetType (callIntern tailkind f as) rt

selDeref :: Operand -> Gen Operand
selDeref x = emitAnonReg (load x)
genDeref :: Val -> Gen Val
genDeref = \case
    VVar x -> fmap VVar (emitAnonReg (load x))
    VLocal x -> pure (VVar x)

-- | Assumes that the from-type and to-type are of the same size.
transmute :: Type -> Type -> Val -> Gen Val


@@ 1015,20 1024,14 @@ lookupDatatype x = view (enumTypes . to (Map.lookup x)) >>= \case
    Nothing -> fmap (maybe (ice ("Undefined datatype " ++ show x)) typeStruct)
                    (view (dataTypes . to (Map.lookup x)))

genIndexStruct :: Val -> [Word32] -> Gen Val
genIndexStruct v is = case v of
    VLocal st -> fmap VLocal (emitAnonReg =<< extractvalue st is)
    VVar ptr -> fmap VVar (emitAnonReg =<< getelementptr ptr (litI64 0) is)

extractvalue :: Operand -> [Word32] -> Gen FunInstr
extractvalue struct is = fmap (WithRetType (ExtractValue struct is))
                              (getIndexed (typeOf struct) (map fromIntegral is))
  where
    getIndexed = foldlM $ \t i -> getMembers t <&> \us -> if i < length us
        then us !! i
        else
            ice
            $ "extractvalue: index out of bounds: "
            ++ (show (typeOf struct) ++ ", " ++ show is)
    getMembers = \case
        NamedTypeReference x -> getMembers =<< lift (lookupDatatype x)
        StructureType _ members -> pure members
        t -> ice $ "Tried to get member types of non-struct type " ++ show t

undef :: Type -> Operand
undef = ConstantOperand . LLConst.Undef


@@ 1150,6 1153,16 @@ sitofp x t = WithRetType (SIToFP x t) t
insertvalue :: Operand -> Operand -> [Word32] -> FunInstr
insertvalue s e is = WithRetType (InsertValue s e is) (typeOf s)

getelementptr :: Operand -> Operand -> [Word32] -> Gen FunInstr
getelementptr addr offset memberIs = fmap
    (WithRetType $ \meta -> GetElementPtr { inBounds = False
                                          , address = addr
                                          , indices = offset : map litU32 memberIs
                                          , metadata = meta
                                          }
    )
    (fmap LLType.ptr (getIndexed (getPointee (typeOf addr)) (map fromIntegral memberIs)))

store :: Operand -> Operand -> Instr
store srcVal destPtr meta = Store { volatile = False
                                  , address = destPtr


@@ 1190,6 1203,9 @@ litI64' = LLConst.Int 64 . toInteger
litI32 :: Int -> Operand
litI32 = ConstantOperand . LLConst.Int 32 . toInteger

litU32 :: Word32 -> Operand
litU32 = ConstantOperand . LLConst.Int 32 . toInteger

litI8' :: Integral n => n -> LLConst.Constant
litI8' = LLConst.Int 8 . toInteger



@@ 1260,6 1276,21 @@ getPointee = \case
    LLType.PointerType t _ -> t
    t -> ice $ "Tried to get pointee of non-function type " ++ show t

getIndexed :: Type -> [Int] -> Gen Type
getIndexed t is = foldlM
    (\t' i -> getMembers t' <&> \us -> if i < length us
        then us !! i
        else ice $ "getIndexed: index out of bounds: " ++ (show t ++ ", " ++ show is)
    )
    t
    is

getMembers :: Type -> Gen [Type]
getMembers = \case
    NamedTypeReference x -> getMembers =<< lift (lookupDatatype x)
    StructureType _ members -> pure members
    t -> ice $ "Tried to get member types of non-struct type " ++ show t

getIntBitWidth :: Type -> Word32
getIntBitWidth = \case
    LLType.IntegerType w -> w

M src/Selections.hs => src/Selections.hs +2 -2
@@ 18,7 18,7 @@ newSelections :: a -> Selections a
newSelections x = Map.singleton Obj x

select
    :: (Show a, Monad m)
    :: (Monad m)
    => (Span -> [Type] -> a -> m a)
    -> (Span -> Word32 -> a -> m a)
    -> (a -> m a)


@@ 45,7 45,7 @@ select conv sub deref selector selections = case Map.lookup selector selections 
        pure (a, Map.insert selector a selections')

selectVarBindings
    :: (Show a, Monad m)
    :: (Monad m)
    => (Span -> [Type] -> a -> m a)
    -> (Span -> Word32 -> a -> m a)
    -> (a -> m a)