~thon/thon

thon/parse.sml -rw-r--r-- 9.7 KiB
2c9c65d6Evan Bergeron Parse SUCC | LET | CASE | FN 3 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
structure NewParse : PARSE =
struct

exception UnexpectedToken of string
exception Unimplemented of string

fun parse s = A.Zero

fun incr i = (i := !i + 1)

fun println s = print (s  ^ "\n")

fun debugPrint s =
    if false then println s
    else ()

fun errMsg (expectedToken, actualToken) =
    ("Expected " ^ (Lex.tokenToString expectedToken) ^
     ", got " ^ (Lex.tokenToString actualToken) ^ "\n")

fun expect tokens (token : Lex.Token) i =
    if List.nth (tokens, !i) <> token then
        (print (errMsg(token,  List.nth (tokens, !i)));
         raise UnexpectedToken(errMsg(token,  List.nth (tokens, !i))))
    else (i := !i + 1)

fun lookahead tokens i =
    if ((!i)+1) > ((List.length tokens) - 1)
    then NONE
    else SOME (List.nth (tokens, ((!i)+1)))

fun consumeName tokens i =
    let val res  =
            (case List.nth (tokens, !i) of
                 Lex.NAME n => n
               | tok =>

                 (print(errMsg((Lex.NAME "some name"), tok));
                  raise UnexpectedToken(errMsg((Lex.NAME "some name"), tok))))
    in i := (!i) + 1;
       res end

fun consumeNewlines tokens i =
    if (!i) >= (List.length tokens) then () else
    case List.nth (tokens, !i) of
        Lex.NEWLINE => (incr(i); consumeNewlines tokens i)
      | _ => ()

fun parseType tokens i =
    let val this =
            (case List.nth (tokens, !i) of
                 Lex.NAT => (i := (!i) + 1; A.Nat)
               | Lex.NAME name => (i := (!i) + 1; A.TypVar(name, ~1))
               | Lex.UNIT => (i := (!i) + 1; A.Unit)
               | _ => raise Unimplemented("See token that is not nat or name in type"))
    in
        case List.nth (tokens, !i) of
            Lex.SARROW => (incr(i); A.Arr(this, (parseType tokens i)))
          | Lex.STAR => (incr(i); A.Prod(this, (parseType tokens i)))
          | _ => this
    end


(* Parses between parens - expect LPAREN in caller before calling and RPAREN after *)
fun parseFuncCallParams tokens i =
    let
        val arg = parseExpr tokens i
    in
        (* TODO eventually got clean up all these unguarded array accesses *)
        (* Also UNDONE handle more than two func call params *)
        case List.nth (tokens, !i) of
            Lex.COMMA =>
            let
                val () = expect tokens Lex.COMMA i
                val arg2 = parseExpr tokens i
            in
                A.Pair(arg, arg2)
            end
           | Lex.RPAREN => arg
           | tok => raise UnexpectedToken("expected func param or LPAREN, got " ^
                                          (Lex.tokenToString tok))
    end


and parseExpr tokens i =
    (if (!i) >= (List.length tokens) then A.TmUnit else
     (case List.nth (tokens, !i) of
          Lex.FUN => (
           let
               val () = expect tokens Lex.FUN i
               val funcName = consumeName tokens i
               val () = debugPrint (funcName ^ " begin")
               val () = expect tokens Lex.LPAREN i
               (* TODO multiple params - should implement n-nary products first *)
               val argName = consumeName tokens i
               val argType = parseType tokens i
               val () = expect tokens Lex.RPAREN i
               val retType = parseType tokens i
               val funcType = A.Arr(argType, retType)
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val () = debugPrint (funcName ^ " indent")
               val body = parseExpr tokens i
               val () = debugPrint (funcName ^ " end of body")
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
               val () = debugPrint (funcName ^ " dedent")
               val () = consumeNewlines tokens i
               val () = debugPrint (funcName ^ " afterwards")
           in
               if (!i) < (List.length tokens) andalso
                  List.nth (tokens, (!i))  = Lex.DEDENT then
                   (debugPrint (funcName ^ "see dedent next");
                    (* TODO double check these semantics. If there's a
                      dedent after this funciton definition, then this is
                      the last chunk of the parent block and so the value
                      of the parent block should be this function? If so,
                      will need to replicate this logic across every
                      other construct. *)
                    A.Let(funcName, funcType,
                          A.Fix(funcName, funcType,
                                A.Fn(argName, argType, body)), A.Var(funcName, ~1)))
               else
                   let
                       val rest = parseExpr tokens i
                   in
                       A.Let(funcName, funcType,
                             A.Fix(funcName, funcType,
                                   A.Fn(argName, argType, body)), rest)
                   end
           end
        )
        | Lex.DATA => (
           let
               val () = expect tokens Lex.DATA i
               val datatypeName = consumeName tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val fstTypeCtorName = consumeName tokens i
               val fstTypeCtorType = parseType tokens i
               val () = consumeNewlines tokens i
               val sndTypeCtorName = consumeName tokens i
               val sndTypeCtorType = parseType tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
               val () = consumeNewlines tokens i
           in
               (* TODO handle dedent again case *)
                let
                    val rest = parseExpr tokens i
                in
                    A.Data(datatypeName,
                           fstTypeCtorName, fstTypeCtorType,
                           sndTypeCtorName, sndTypeCtorType,
                           rest)
                end
           end
        )
        | Lex.UNIT => (incr(i); A.TmUnit)
        | Lex.ZERO => (incr(i); A.Zero)
        | Lex.FN => (
            let
                val () = expect tokens Lex.FN i
                val () = expect tokens Lex.LPAREN i
                (* TODO multiple params - should implement n-nary products first *)
                val argName = consumeName tokens i
                val argType = parseType tokens i
                val () = expect tokens Lex.RPAREN i
                val () = expect tokens Lex.DARROW i
                val body = parseExpr tokens i
                val () = consumeNewlines tokens i
            in
                A.Fn(argName, argType, body)
            end)
        | Lex.LET => (
            let
                val () = expect tokens Lex.LET i
                val varName = consumeName tokens i
                val varType = parseType tokens i
                val () = expect tokens Lex.EQ i
                val varExpr = parseExpr tokens i
                val () = consumeNewlines tokens i
                (* TODO last of block *)
                val rest = parseExpr tokens i
            in
                A.Let(varName, varType, varExpr, rest)
            end
        )
        | Lex.CASE => (
           let
               val () = expect tokens Lex.CASE i
               val caseExpr = parseExpr tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val fstCaseVarName = consumeName tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val fstCaseExpr = parseExpr tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
               val sndCaseVarName = consumeName tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val sndCaseExpr = parseExpr tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
           in
               A.Case(caseExpr, fstCaseVarName, fstCaseExpr, sndCaseVarName, sndCaseExpr)
            end
        )
        | Lex.NAME name =>
          (case lookahead tokens i of
               SOME Lex.LPAREN => (
                (* Function application *)
                let val funcName = consumeName tokens i
                    val () = expect tokens Lex.LPAREN i
                    val arg = parseFuncCallParams tokens i
                    val () = expect tokens Lex.RPAREN i
                in
                    A.App(A.Var(funcName, ~1), arg)
                end
            )
             | _ => let val () = expect tokens (Lex.NAME name) i
                    in
                        A.Var (name, ~1)
                    end
          )
        | Lex.SUCC =>
          (case lookahead tokens i of
               SOME Lex.LPAREN => (
                (* Succ application *)
                let val () = expect tokens Lex.SUCC i
                    val () = expect tokens Lex.LPAREN i
                    val arg = parseExpr tokens i
                    val () = expect tokens Lex.RPAREN i
                in
                    A.Succ(arg)
                end
            )
            | SOME tok => raise UnexpectedToken("Expected ( after s, got " ^ (Lex.tokenToString tok))
            | NONE => raise UnexpectedToken("Unexpected EOF after s")
          )
        | tok => (raise UnexpectedToken("Got unexpected " ^ (Lex.tokenToString tok))))
    )

fun parseFile filename =
    let val tokens = Lex.lexFile filename
        val i = ref 0;
    in
        parseExpr tokens i
    end
    handle UnexpectedToken msg => (print ("Parsing error: " ^ msg ^ "\n");
                                   raise (UnexpectedToken msg) )


end