~jojo/Carth

8cd053f1ad4e3ef36b9c362df90529c32fd6e54f — JoJo 1 year, 11 months ago 964845a
Fix codegen of strings

The type of the GlobalReference was wrong before, also we've removed
TStr in favor of a datatype built on `TPtr Nat8`.

Also, add a system for adding builtin datatypes that doesn't require
any ugly hacks with dummy-positions and stuff.
4 files changed, 57 insertions(+), 22 deletions(-)

M src/Ast.hs
M src/Check.hs
M src/Codegen.hs
M src/Parse.hs
M src/Ast.hs => src/Ast.hs +0 -2
@@ 59,7 59,6 @@ data TPrim
    | TInt
    | TDouble
    | TChar
    | TStr
    | TBool
    deriving (Show, Eq, Ord)



@@ 339,7 338,6 @@ prettyTPrim = \case
    TInt -> "Int"
    TDouble -> "Double"
    TChar -> "Char"
    TStr -> "Str"
    TBool -> "Bool"

prettyTVar :: TVar -> String

M src/Check.hs => src/Check.hs +37 -5
@@ 12,6 12,7 @@ import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Either.Combinators
import Data.Bifunctor
import Data.Foldable
import Data.Graph (SCC(..), flattenSCC, stronglyConnComp)
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)


@@ 129,15 130,17 @@ checkTypeDefs
           , Map String (VariantIx, (String, [TVar]), [Type], Span)
           )
checkTypeDefs =
    (fmap (second (fmap snd)) .)
        $ flip foldM (Map.empty, Map.empty)
    flip foldM (builtinDataTypes, builtinConstructors)
        $ \(tds', csAcc) td@(Ast.TypeDef x _ _) -> do
            when (Map.member (idstr x) tds') (throwError (ConflictingTypeDef x))
            (td', cs) <- checkTypeDef td
            case listToMaybe (Map.elems (Map.intersection csAcc cs)) of
            case listToMaybe (Map.elems (Map.intersection cs csAcc)) of
                Just (cId, _) -> throwError (ConflictingCtorDef cId)
                Nothing ->
                    pure (uncurry Map.insert td' tds', Map.union cs csAcc)
                    pure
                        ( uncurry Map.insert td' tds'
                        , Map.union (fmap snd cs) csAcc
                        )

checkTypeDef
    :: Ast.TypeDef


@@ 162,6 165,35 @@ checkTypeDef (Ast.TypeDef x' ps (Ast.ConstructorDefs cs)) = do
        (zip [0 ..] cs)
    pure ((x, (ps', cs')), cs''')

builtinDataTypes :: Map String ([TVar], [(String, [Type])])
builtinDataTypes =
    Map.fromList (map (\(x, ps, cs) -> (x, (ps, cs))) builtinDataTypes')

builtinConstructors :: Map String (VariantIx, (String, [TVar]), [Type], Span)
builtinConstructors = Map.unions (map builtinConstructors' builtinDataTypes')

builtinConstructors'
    :: (String, [TVar], [(String, [Type])])
    -> Map String (VariantIx, (String, [TVar]), [Type], Span)
builtinConstructors' (x, ps, cs) =
    let cSpan = length cs
    in
        foldl'
            (\csAcc (i, (cx, cps)) ->
                Map.insert cx (i, (x, ps), cps, cSpan) csAcc
            )
            Map.empty
            (zip [0 ..] cs)

builtinDataTypes' :: [(String, [TVar], [(String, [Type])])]
builtinDataTypes' =
    [ ( "Array"
      , [TVImplicit 0]
      , [("Array", [TPtr (TVar (TVImplicit 0)), TPrim TNat])]
      )
    , ("Str", [], [("Str", [TConst ("Array", [TPrim TNat8])])])
    ]

inferDefs :: [Ast.Def] -> Infer Defs
inferDefs defs = do
    let ordered = orderDefs defs


@@ 357,7 389,7 @@ litType = \case
    Int _ -> TPrim TInt
    Double _ -> TPrim TDouble
    Char _ -> TPrim TChar
    Str _ -> TPrim TStr
    Str _ -> TConst ("Str", [])
    Bool _ -> TPrim TBool

lookupEnv :: Id Small -> Infer Type

M src/Codegen.hs => src/Codegen.hs +20 -14
@@ 80,7 80,7 @@ type Gen' = StateT St (ReaderT Env EncodeAST)
-- | The output of generating a function
data Out = Out
    { _outBlocks :: [BasicBlock]
    , _outStrings :: [(Name, String)]
    , _outStrings :: [(Name, Word64, [Word8])]
    , _outFuncs :: [(Name, [TypedVar], TypedVar, Expr)]
    }
makeLenses ''Out


@@ 292,7 292,6 @@ toLlvmType = \case
        TInt -> i64
        TDouble -> double
        TChar -> i32
        TStr -> LLType.ptr i8
        TBool -> typeBool
    TFun a r -> typeStruct
        [ LLType.ptr typeUnit


@@ 309,8 308,17 @@ genConst = \case
    Char c -> pure $ litI32 (Data.Char.ord c)
    Str s -> do
        var <- newName "strlit"
        scribe outStrings [(var, s)]
        pure $ LLConst.GlobalReference (LLType.ptr i8) var
        let bytes = UTF8.String.encode s
        let len = fromIntegral (length bytes)
        let t = ArrayType len i8
        scribe outStrings [(var, len, bytes)]
        let llArrayVal = LLConst.GlobalReference (LLType.ptr t) var
        let ptrVal = LLConst.BitCast llArrayVal (LLType.ptr i8)
        let arrayVal = litStructOfType
                ("Array", [TPrim TNat8])
                [litI64 0, ptrVal, litU64 len]
        let strVal = litStructOfType ("Str", []) [litI64 0, arrayVal]
        pure strVal
    Bool b -> pure $ litBool b

lookupVar :: TypedVar -> Gen Operand


@@ 477,15 485,10 @@ semiExecRetGen gx = runWriterT $ do
    x <- gx
    commitFinalFuncBlock (ret x)
    pure (typeOf x)
globStrVar :: (Name, Word64, [Word8]) -> Global
globStrVar (name, len, bytes) =
    simpleGlobVar name (ArrayType len i8) (LLConst.Array i8 (map litI8 bytes))

globStrVar :: (Name, String) -> Global
globStrVar (name, str) =
    let bytes = UTF8.String.encode str
    in
        simpleGlobVar
            name
            (ArrayType (fromIntegral (length bytes)) i8)
            (LLConst.Array i8 (map (litI8) bytes))

simpleFunc :: Name -> [Parameter] -> Type -> [BasicBlock] -> Global
simpleFunc = ($ []) .** simpleFunc'


@@ 518,7 521,10 @@ simpleFunc' n ps rt fnAttrs bs = Function
    }

simpleGlobVar :: Name -> Type -> LLConst.Constant -> Global
simpleGlobVar name t init = GlobalVariable
simpleGlobVar name t = simpleGlobVar' name t . Just

simpleGlobVar' :: Name -> Type -> Maybe LLConst.Constant -> Global
simpleGlobVar' name t init = GlobalVariable
    { LLGlob.name = name
    , LLGlob.linkage = LLLink.External
    , LLGlob.visibility = LLVis.Default


@@ 528,7 534,7 @@ simpleGlobVar name t init = GlobalVariable
    , LLGlob.unnamedAddr = Nothing
    , LLGlob.isConstant = True
    , LLGlob.type' = t
    , LLGlob.initializer = Just init
    , LLGlob.initializer = init
    , LLGlob.section = Nothing
    , LLGlob.comdat = Nothing
    , LLGlob.alignment = 0

M src/Parse.hs => src/Parse.hs +0 -1
@@ 275,7 275,6 @@ ns_tprim = try $ do
        "Int" -> pure TInt
        "Double" -> pure TDouble
        "Char" -> pure TChar
        "Str" -> pure TStr
        "Bool" -> pure TBool
        _ -> fail $ "Undefined type constant " ++ s