~jojo/Carth

ref: 749208029494a7c48ce04444a513424f4b998416 Carth/src/Parser.hs -rw-r--r-- 4.4 KiB
74920802JoJo update TODO 6 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
{-# LANGUAGE DataKinds #-}

module Parser where

import Control.Applicative hiding (many, some)
import Control.Monad
import Control.Monad.State.Strict
import Control.Monad.Except
import Data.Functor
import Data.List
import Data.Set (Set)
import qualified Data.Set as Set

import Misc
import SrcPos
import Lexd
import Parsed
import Pretty

data Err = Err
    { errLength :: Word
    , errPos :: SrcPos
    , errExpecteds :: Set String
    }
    deriving (Show, Eq)

instance Semigroup Err where
    (<>) e1 e2
        | e2 == mempty = e1
        | errLength e1 > errLength e2 = e1
        | errLength e1 < errLength e2 = e2
        | otherwise = Err (errLength e2)
                          (errPos e2)
                          (Set.union (errExpecteds e1) (errExpecteds e2))

instance Monoid Err where
    mempty = Err 0 (SrcPos "<dummy>" 0 0 Nothing) Set.empty

data St = St
    { stCount :: Word
    , stOuterPos :: SrcPos
    , stInput :: [TokenTree]
    }

newtype Parser a = Parser (StateT St (Except Err) a)
    deriving (Functor, Applicative, MonadPlus, Monad, MonadError Err, MonadState St)

instance Alternative Parser where
    empty = Parser (throwError mempty)
    (<|>) ma mb = do
        n <- gets stCount
        catchError ma $ \e -> if errLength e > n
            then throwError e
            else catchError mb (throwError . (e <>))

runParser' :: Parser a -> [TokenTree] -> Except (SrcPos, String) a
runParser' ma = runParser ma (ice "read SrcPos in parser state at top level")

runParser :: Parser a -> SrcPos -> [TokenTree] -> Except (SrcPos, String) a
runParser (Parser ma) surroundingPos tts =
    let initSt = St 0 surroundingPos tts
        formatExpecteds es = case Set.toList es of
            [] -> ice "empty list of expecteds in formatExpecteds"
            [e] -> "Expected " ++ e
            es' -> "Expected one of: " ++ intercalate ", " es'
    in  withExcept (\(Err _ pos es) -> (pos, formatExpecteds es)) (evalStateT ma initSt)

token :: String -> (SrcPos -> TokenTree' -> Maybe a) -> Parser a
token exp f = do
    St n outerPos xs <- get
    case xs of
        WithPos innerPos x : xs' -> do
            a <- mexcept (Err n innerPos (Set.singleton exp)) (f innerPos x)
            modify (\st -> st { stCount = n + 1, stInput = xs' })
            pure a
        [] -> throwError
            $ Err n outerPos (Set.singleton "continuation of token sequence")

-- | Succeeds only when current input sequence (may be nested in sexpr) is empty
end :: Parser ()
end = get >>= \(St n _ inp) -> case inp of
    [] -> pure ()
    WithPos p _ : _ ->
        throwError (Err n p (Set.singleton "end of (nested) token sequence"))

mexcept :: MonadError e m => e -> Maybe a -> m a
mexcept e = maybe (throwError e) pure

lookAhead :: Parser a -> Parser a
lookAhead pa = get >>= \s -> pa >>= \a -> put s $> a

try :: Parser a -> Parser a
try ma = do
    s <- get
    catchError ma $ \e -> do
        put s
        throwError (e { errLength = stCount s })

getSrcPos :: Parser SrcPos
getSrcPos = lookAhead (token "token" (Just .* const))

anyToken :: Parser TokenTree
anyToken = token "any token" (Just .* WithPos)

withPos :: Parser a -> Parser (WithPos a)
withPos = liftA2 WithPos getSrcPos

parens :: Parser a -> Parser a
parens ma = parens' (const ma)

parens' :: (SrcPos -> Parser a) -> Parser a
parens' = sexpr "`(`" $ \case
    Parens tts -> Just tts
    _ -> Nothing

brackets :: Parser a -> Parser a
brackets ma = brackets' (const ma)

brackets' :: (SrcPos -> Parser a) -> Parser a
brackets' = sexpr "`[`" $ \case
    Brackets tts -> Just tts
    _ -> Nothing

sexpr :: String -> (TokenTree' -> Maybe [TokenTree]) -> (SrcPos -> Parser a) -> Parser a
sexpr expected extract f = do
    (p, tts) <- token expected $ \p' -> fmap (p', ) . extract
    St _ pOld ttsOld <- get
    modify (\st -> st { stOuterPos = p, stInput = tts })
    a <- f p
    end
    modify (\st -> st { stOuterPos = pOld, stInput = ttsOld })
    pure a

big' :: Parser String
big' = fmap idstr big

big :: Parser (Id 'Parsed.Big)
big = token "big identifier" $ \p -> \case
    Lexd.Big x -> Just (Id (WithPos p x))
    _ -> Nothing

small' :: Parser String
small' = fmap idstr small

small :: Parser (Id 'Parsed.Small)
small = token "small identifier" $ \p -> \case
    Lexd.Small x -> Just (Id (WithPos p x))
    _ -> Nothing

reserved :: Keyword -> Parser ()
reserved k = token ("keyword " ++ pretty k) $ const $ \case
    Keyword k' | k == k' -> Just ()
    _ -> Nothing