~jojo/Carth

ref: 66b609789f715872c992ef8e79a58a0653fac0d1 Carth/src/Lex.hs -rw-r--r-- 6.0 KiB
66b60978JoJo Update stackage release to 16.18 1 year, 3 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
{-# LANGUAGE FlexibleContexts, LambdaCase, TupleSections, DataKinds #-}

module Lex where

import Control.Monad
import Control.Monad.Except
import Data.Char (isMark, isPunctuation, isSymbol)
import Data.Functor
import Data.Maybe
import Control.Applicative (liftA2)
import Text.Megaparsec hiding (parse, match)
import Text.Megaparsec.Char hiding (space, space1)
import qualified Text.Megaparsec.Char as Char
import qualified Text.Megaparsec.Char.Lexer as Lexer
import Data.Set (Set)
import qualified Data.Set as Set
import System.FilePath
import System.Directory

import Misc
import SrcPos
import Lexd
import Literate
import EnvVars


type Import = String

-- data Macro = Macro String deriving (Show)

data TopLevel = TImport Import -- | TMacro Macro
    | TTokenTree TokenTree


lex :: FilePath -> IO (Either String [TokenTree])
lex filepath = do
    modPaths <- modulePaths
    runExceptT (lexModules modPaths Set.empty [filepath])

lexModules :: [FilePath] -> Set String -> [String] -> ExceptT String IO [TokenTree]
lexModules modPaths visiteds = \case
    [] -> pure []
    f : nexts | Set.member f visiteds -> lexModules modPaths visiteds nexts
    f : nexts -> do
        s <- lift (readFile f)
            <&> \s' -> if takeExtension f == ".org" then untangleOrg s' else s'
        (imps, tts) <- liftEither $ parse' toplevels f s
        let ps = takeDirectory f : modPaths
        let resolve m = do
                let gs = [ p </> addExtension m e | p <- ps, e <- [".carth", ".org"] ]
                gs' <- filterM (lift . doesFileExist) gs
                case listToMaybe gs' of
                    Nothing ->
                        throwError
                            $ ("Error: No file for module " ++ m ++ " exists.\n")
                            ++ ("Searched paths: " ++ show ps)
                    Just g' -> pure g'
        impFs <- mapM resolve imps
        ttsNexts <- lexModules modPaths (Set.insert f visiteds) (impFs ++ nexts)
        pure (tts ++ ttsNexts)

toplevels :: Parser ([Import], [TokenTree])
toplevels = do
    space
    tops <- many toplevel
    eof
    pure $ foldr
        (\top (is, tts) -> case top of
            TImport i -> (i : is, tts)
            TTokenTree tt -> (is, tt : tts)
        )
        ([], [])
        tops

toplevel :: Parser TopLevel
toplevel = getSrcPos >>= \p -> parens
    (fmap TImport import' <|> fmap (TTokenTree . WithPos p . Parens) (many tokentree))
    where import' = keyword' "import" *> small

tokentree :: Parser TokenTree
tokentree = withPos tokentree'
  where
    tokentree' = choice
        [ fmap Small smallSpecial
        , fmap Big bigSpecial
        , fmap Keyword (try keyword)
        , fmap Small smallNormal
        , fmap Big bigNormal
        , fmap Lit lit
        , fmap Parens (parens (many tokentree))
        , fmap Brackets (brackets (many tokentree))
        , fmap Braces (braces (many tokentree))
        ]
    lit = try num <|> fmap Str strlit
    num = andSkipSpaceAfter ns_num
    ns_num = do
        neg <- option False (char '-' $> True)
        a <- eitherP (try (ns_nat <* notFollowedBy (char '.'))) Lexer.float
        pure $ either ((\n -> Int (if neg then -n else n)) . fromIntegral)
                      (\x -> F64 (if neg then -x else x))
                      a
    -- ns_int = option id (char '-' $> negate) >>= \f -> fmap (f . fromIntegral) ns_nat
    ns_nat :: Parser Word
    ns_nat = choice
        [string "0b" *> Lexer.binary, string "0x" *> Lexer.hexadecimal, Lexer.decimal]

strlit :: Parser String
strlit = andSkipSpaceAfter ns_strlit
    where ns_strlit = char '"' >> manyTill Lexer.charLiteral (char '"')

keyword :: Parser Keyword
keyword = andSkipSpaceAfter $ choice
    [ string ":" $> Kcolon
    , string "." $> Kdot
    , string "Fun" $> KFun
    , string "Box" $> KBox
    , string "define" $> Kdefine
    , string "define:" $> KdefineColon
    , string "extern" $> Kextern
    , string "forall" $> Kforall
    , string "fmatch" $> Kfmatch
    , string "match" $> Kmatch
    , string "if" $> Kif
    , string "fun" $> Kfun
    , string "let1" $> Klet1
    , string "let" $> Klet
    , string "letrec" $> Kletrec
    , string "data" $> Kdata
    , string "sizeof" $> Ksizeof
    , string "import" $> Kimport
    , string "case" $> Kcase
    , string "id@" $> KidAt
    , string "Id@" $> KIdAt
    ]

keyword' :: String -> Parser ()
keyword' x = andSkipSpaceAfter $ label ("keyword " ++ x) (string x) $> ()

small, smallSpecial, smallNormal :: Parser String
small = smallSpecial <|> smallNormal
smallSpecial = keyword' "id@" *> strlit
smallNormal = andSkipSpaceAfter $ liftA2 (:) smallStart identRest
  where
    smallStart = lowerChar <|> otherChar <|> try (oneOf "-+" <* notFollowedBy digitChar)

big, bigSpecial, bigNormal :: Parser String
big = bigSpecial <|> bigNormal
bigSpecial = keyword' "id@" *> strlit
bigNormal = andSkipSpaceAfter $ liftA2 (:) bigStart identRest
    where bigStart = upperChar <|> char ':'

identRest :: Parser String
identRest = many identLetter
    where identLetter = letterChar <|> otherChar <|> oneOf "-+:" <|> digitChar

otherChar :: Parser Char
otherChar = satisfy
    (\c -> and
        [ any ($ c) [isMark, isPunctuation, isSymbol]
        , not (elem c "()[]{}")
        , not (elem c "\"-+:")
        ]
    )

parens, ns_parens :: Parser a -> Parser a
parens = andSkipSpaceAfter . ns_parens
ns_parens = between (symbol "(") (string ")")

brackets, ns_brackets :: Parser a -> Parser a
brackets = andSkipSpaceAfter . ns_brackets
ns_brackets = between (symbol "[") (string "]")

braces, ns_braces :: Parser a -> Parser a
braces = andSkipSpaceAfter . ns_braces
ns_braces = between (symbol "{") (string "}")

andSkipSpaceAfter :: Parser a -> Parser a
andSkipSpaceAfter = Lexer.lexeme space

symbol :: String -> Parser String
symbol = Lexer.symbol space

-- | Spaces, line comments, and block comments
space :: Parser ()
space = Lexer.space Char.space1 (Lexer.skipLineComment ";") empty

withPos :: Parser a -> Parser (WithPos a)
withPos = liftA2 WithPos getSrcPos

getSrcPos :: Parser SrcPos
getSrcPos = fmap
    (\(SourcePos f l c) -> SrcPos f (fromIntegral (unPos l)) (fromIntegral (unPos c)))
    getSourcePos