~thon/thon

d04d68b582842f99e579ba2ef2a1229e8482fb24 — Evan Bergeron 2 months ago 4637d04
Parse and lex data decl, two param func calls, prod types, unit
4 files changed, 125 insertions(+), 51 deletions(-)

A examples/isemptynew.thon
M lex.sml
M parse.sml
M test.sml
A examples/isemptynew.thon => examples/isemptynew.thon +5 -0
@@ 0,0 1,5 @@
data list:
    nil unit
    cons nat * list

cons(z, nil(unit))

M lex.sml => lex.sml +11 -4
@@ 1,12 1,12 @@
structure Lex : sig
datatype Token = FUN | FN | NAT | COLON | LPAREN | RPAREN | NAME of string | INDENT | DEDENT | RETURN | ZERO | SUCC | LET | SARROW | EQUAL | DARROW | IF | THEN | ELSE | DATA | BAR | CASE | COMMA | NEWLINE
datatype Token = FUN | FN | NAT | COLON | LPAREN | RPAREN | NAME of string | INDENT | DEDENT | RETURN | ZERO | SUCC | LET | SARROW | EQUAL | DARROW | IF | THEN | ELSE | DATA | BAR | CASE | COMMA | NEWLINE | UNIT | STAR
val lexFile : string -> Token list
val lexFileNoPrintErrMsg : string -> Token list
val tokenToString : Token -> string
end  =
struct

datatype Token = FUN | FN | NAT | COLON | LPAREN | RPAREN | NAME of string | INDENT | DEDENT | RETURN | ZERO | SUCC | LET | SARROW | EQUAL | DARROW | IF | THEN | ELSE | DATA | BAR | CASE | COMMA | NEWLINE
datatype Token = FUN | FN | NAT | COLON | LPAREN | RPAREN | NAME of string | INDENT | DEDENT | RETURN | ZERO | SUCC | LET | SARROW | EQUAL | DARROW | IF | THEN | ELSE | DATA | BAR | CASE | COMMA | NEWLINE | UNIT | STAR

exception No
exception UnexpectedIndentLevel


@@ 43,6 43,8 @@ fun tokenToString FUN = "FUN"
  | tokenToString CASE = "CASE"
  | tokenToString COMMA = "COMMA"
  | tokenToString NEWLINE = "NEWLINE"
  | tokenToString UNIT = "UNIT"
  | tokenToString STAR = "STAR"

fun lookaheadN s n =
    (* Can raise Size *)


@@ 170,6 172,11 @@ and lexLines' s out indentLevel =
        ) else (
            raise UnexpectedToken("saw `=`, expected `=>` or `=`")
        )
      | "*" => (
            eatWord "*" s;
            lexLines' s (STAR::out) indentLevel
          )
      | "u" => eatKeywordOrName ("unit", UNIT) s indentLevel out
      | "i" => eatKeywordOrName ("if", IF) s indentLevel out
      | "t" => eatKeywordOrName ("then", THEN) s indentLevel out
      | "e" => eatKeywordOrName ("else", ELSE) s indentLevel out


@@ 215,7 222,7 @@ and lexLines' s out indentLevel =
      )
      | other =>
        if not (Char.isAlpha (String.sub (other, 0))) then
            raise UnexpectedToken("indentifiers must start with alphabetic characters")
            raise UnexpectedToken("See identifier starting with " ^ other ^ ". Indentifiers must start with alphabetic characters.")
        else
            let val name = getName s in
            eatWord name s;


@@ 233,7 240,7 @@ fun lex s printErrMsg =
    in
        forewards
    end
    handle UnexpectedToken msg => (if printErrMsg then print ("Syntax error: " ^ msg ^ "\n") else ();
    handle UnexpectedToken msg => (if printErrMsg then print ("Lexing error: " ^ msg ^ "\n") else ();
                                   raise (UnexpectedToken msg) )

fun lexFile filename = lex (TextIO.openIn filename) true

M parse.sml => parse.sml +104 -47
@@ 2,7 2,7 @@ structure NewParse : PARSE =
struct

exception UnexpectedToken of string
exception Unimplemented
exception Unimplemented of string

fun parse s = A.Zero



@@ 50,59 50,113 @@ fun parseType tokens i =
    let val this =
            (case List.nth (tokens, !i) of
                 Lex.NAT => (i := (!i) + 1; A.Nat)
               | _ => raise Unimplemented)
               | Lex.NAME name => (i := (!i) + 1; A.TypVar(name, ~1))
               | Lex.UNIT => (i := (!i) + 1; A.Unit)
               | _ => raise Unimplemented("See token that is not nat or name in type"))
    in
        (case List.nth (tokens, !i) of
             Lex.SARROW => (incr(i); A.Arr(this, (parseType tokens i))) | _ => this)
        case List.nth (tokens, !i) of
            Lex.SARROW => (incr(i); A.Arr(this, (parseType tokens i)))
          | Lex.STAR => (incr(i); A.Prod(this, (parseType tokens i)))
          | _ => this
    end

fun parseExpr tokens i =

(* Parses between parens - expect LPAREN in caller before calling and RPAREN after *)
fun parseFuncCallParams tokens i =
    let
        val arg = parseExpr tokens i
    in
        (* TODO eventually got clean up all these unguarded array accesses *)
        (* Also UNDONE handle more than two func call params *)
        case List.nth (tokens, !i) of
            Lex.COMMA =>
            let
                val () = expect tokens Lex.COMMA i
                val arg2 = parseExpr tokens i
            in
                A.Pair(arg, arg2)
            end
           | Lex.RPAREN => arg
           | tok => raise UnexpectedToken("expected func param or LPAREN, got " ^
                                          (Lex.tokenToString tok))
    end


and parseExpr tokens i =
    (if (!i) >= (List.length tokens) then A.TmUnit else
     (case List.nth (tokens, !i) of
          Lex.FUN =>
          let
              val () = expect tokens Lex.FUN i
              val funcName = consumeName tokens i
              val () = debugPrint (funcName ^ " begin")
              val () = expect tokens Lex.LPAREN i
              (* TODO multiple params - should implement n-nary products first *)
              val argName = consumeName tokens i
              val argType = parseType tokens i
              val () = expect tokens Lex.RPAREN i
              val retType = parseType tokens i
              val funcType = A.Arr(argType, retType)
              val () = consumeNewlines tokens i
              val () = expect tokens Lex.INDENT i
              val () = debugPrint (funcName ^ " indent")
              val body = parseExpr tokens i
              val () = debugPrint (funcName ^ " end of body")
              val () = consumeNewlines tokens i
              val () = expect tokens Lex.DEDENT i
              val () = debugPrint (funcName ^ " dedent")
              val () = consumeNewlines tokens i
              val () = debugPrint (funcName ^ " afterwards")
          in
              if (!i) < (List.length tokens) andalso
                 List.nth (tokens, (!i))  = Lex.DEDENT then
                  (debugPrint (funcName ^ "see dedent next");
                   (* TODO double check these semantics. If there's a
          Lex.FUN => (
           let
               val () = expect tokens Lex.FUN i
               val funcName = consumeName tokens i
               val () = debugPrint (funcName ^ " begin")
               val () = expect tokens Lex.LPAREN i
               (* TODO multiple params - should implement n-nary products first *)
               val argName = consumeName tokens i
               val argType = parseType tokens i
               val () = expect tokens Lex.RPAREN i
               val retType = parseType tokens i
               val funcType = A.Arr(argType, retType)
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val () = debugPrint (funcName ^ " indent")
               val body = parseExpr tokens i
               val () = debugPrint (funcName ^ " end of body")
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
               val () = debugPrint (funcName ^ " dedent")
               val () = consumeNewlines tokens i
               val () = debugPrint (funcName ^ " afterwards")
           in
               if (!i) < (List.length tokens) andalso
                  List.nth (tokens, (!i))  = Lex.DEDENT then
                   (debugPrint (funcName ^ "see dedent next");
                    (* TODO double check these semantics. If there's a
                      dedent after this funciton definition, then this is
                      the last chunk of the parent block and so the value
                      of the parent block should be this function? If so,
                      will need to replicate this logic across every
                      other construct. *)
                   A.Let(funcName, funcType,
                         A.Fix(funcName, funcType,
                               A.Fn(argName, argType, body)), A.Var(funcName, ~1)))
              else
                  let
                      val rest = parseExpr tokens i
                  in
                      A.Let(funcName, funcType,
                            A.Fix(funcName, funcType,
                                  A.Fn(argName, argType, body)), rest)
                  end
          end
                    A.Let(funcName, funcType,
                          A.Fix(funcName, funcType,
                                A.Fn(argName, argType, body)), A.Var(funcName, ~1)))
               else
                   let
                       val rest = parseExpr tokens i
                   in
                       A.Let(funcName, funcType,
                             A.Fix(funcName, funcType,
                                   A.Fn(argName, argType, body)), rest)
                   end
           end
        )
        | Lex.DATA => (
           let
               val () = expect tokens Lex.DATA i
               val datatypeName = consumeName tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.INDENT i
               val fstTypeCtorName = consumeName tokens i
               val fstTypeCtorType = parseType tokens i
               val () = consumeNewlines tokens i
               val sndTypeCtorName = consumeName tokens i
               val sndTypeCtorType = parseType tokens i
               val () = consumeNewlines tokens i
               val () = expect tokens Lex.DEDENT i
               val () = consumeNewlines tokens i
           in
               (* TODO handle dedent again case *)
                let
                    val rest = parseExpr tokens i
                in
                    A.Data(datatypeName,
                           fstTypeCtorName, fstTypeCtorType,
                           sndTypeCtorName, sndTypeCtorType,
                           rest)
                end
           end
        )
        | Lex.UNIT => (incr(i); A.TmUnit)
        | Lex.ZERO => (incr(i); A.Zero)
        | Lex.NAME name =>
          (case lookahead tokens i of


@@ 111,7 165,7 @@ fun parseExpr tokens i =
                let val funcName = consumeName tokens i
                    val () = expect tokens Lex.LPAREN i
                    (* TODO multiple params *)
                    val arg = parseExpr tokens i
                    val arg = parseFuncCallParams tokens i
                    val () = expect tokens Lex.RPAREN i
                in
                    A.App(A.Var(funcName, ~1), arg)


@@ 123,8 177,7 @@ fun parseExpr tokens i =
                    end
          )

        | tok => (println ("Got unexpected " ^
                           (Lex.tokenToString tok)); raise Unimplemented))
        | tok => (raise UnexpectedToken("Got unexpected " ^ (Lex.tokenToString tok))))
    )

fun parseFile filename =


@@ 133,4 186,8 @@ fun parseFile filename =
    in
        parseExpr tokens i
    end
    handle UnexpectedToken msg => (print ("Parsing error: " ^ msg ^ "\n");
                                   raise (UnexpectedToken msg) )


end

M test.sml => test.sml +5 -0
@@ 601,6 601,11 @@ val [FUN,NAME "foo",LPAREN,NAME "a",NAT,RPAREN,NAT,SARROW,NAT,NEWLINE,INDENT,
  : Lex.Token list =
    Lex.lexFile "/home/evan/thon/examples/lex04.thon";


val Data ("list","nil",Unit,"cons",Prod (Nat,TypVar ("list",0)),
          App (Var ("cons",1),Pair (Zero,App (Var ("nil",2),TmUnit)))) : Ast.exp
    = newParseFile "/home/evan/thon/examples/isemptynew.thon";

in
()
end