~technomancy/fennel

0317eb778262b4cd156d7712432b135e4804b56b — Phil Hagelberg 11 months ago 9fc5bff
Refactor the remaining parser clauses.

Remove all early returns and breaks from the parser.
2 files changed, 157 insertions(+), 150 deletions(-)

M src/fennel/parser.fnl
M test/core.fnl
M src/fennel/parser.fnl => src/fennel/parser.fnl +156 -150
@@ 87,14 87,14 @@ stream is finished."

  ;; If you add new calls to this function, please update fennel.friend as well
  ;; to add suggestions for how to fix the new error!
  (fn parse-error [msg]
  (fn parse-error [msg byteindex-override]
    (let [{: source : unfriendly} (or utils.root.options {})]
      (utils.root.reset)
      (if unfriendly
          (error (string.format "Parse error in %s:%s: %s" (or filename :unknown)
                                (or line "?") msg) 0)
          (friend.parse-error msg (or filename "unknown") (or line "?")
                              byteindex source))))
                              (or byteindex-override byteindex) source))))

  (fn parse-stream []
    (var (whitespace-since-dispatch done? retval) true)


@@ 118,159 118,165 @@ stream is finished."
      (if (and b (whitespace? b))
          (do (set whitespace-since-dispatch true)
              (skip-whitespace (getb)))
          (and (not b) (> (# stack) 0))
          (badend)
          b))

    (while true ; main parse loop
      (var b (skip-whitespace (getb)))
    (fn skip-comment [b]
      (if (and b (not= 10 b))
          (skip-comment (getb))
          b))

    (fn open-table [b]
      (when (not whitespace-since-dispatch)
        (parse-error (.. "expected whitespace before opening delimiter "
                         (string.char b))))
      (table.insert stack {:bytestart byteindex :closer (. delims b)
                           :filename filename :line line}))

    (fn close-list [list]
      (dispatch (setmetatable list (getmetatable (utils.list)))))

    (fn close-sequence [tbl]
      (let [val (utils.sequence (unpack tbl))]
        ;; for table literals we can store file/line/offset source
        ;; data in fields on the table itself, because the AST node
        ;; *is* the table, and the fields would show up in the
        ;; compiled output. keep them on the metatable instead.
        (each [k v (pairs tbl)]
          (tset (getmetatable val) k v))
        (dispatch val)))

    (fn close-curly-table [tbl]
      (let [val []] ; a {} table
        (when (not= (% (# tbl) 2) 0)
          (set byteindex (- byteindex 1))
          (parse-error "expected even number of values in table literal"))
        (setmetatable val tbl) ; see note above about source data
        (for [i 1 (# tbl) 2]
          (when (and (= (tostring (. tbl i)) ":")
                     (utils.sym? (. tbl (+ i 1)))
                     (utils.sym? (. tbl i)))
            (tset tbl i (tostring (. tbl (+ i 1)))))
          (tset val (. tbl i) (. tbl (+ i 1))))
        (dispatch val)))

    (fn close-table [b]
      (let [top (table.remove stack)]
        (when (= top nil)
          (parse-error (.. "unexpected closing delimiter " (string.char b))))
        (when (not= top.closer b)
          (parse-error (.. "mismatched closing delimiter " (string.char b)
                           ", expected " (string.char top.closer))))
        (set top.byteend byteindex) ; set closing byte index
        (if (= b 41) (close-list top)
            (= b 93) (close-sequence top)
            (close-curly-table top))))

    (fn parse-string-loop [chars b state]
      (table.insert chars b)
      (let [state (match [state b]
                    [:base 92] :backslash
                    [:base 34] :done
                    _ :base)]
        (if (and b (not= state :done))
            (parse-string-loop chars (getb) state)
            b)))

      (when (not b)
        (when (> (# stack) 0)
          (badend))
        (lua "return nil"))
    (fn parse-string [b]
      (table.insert stack {:closer 34})
      (let [chars [34]
            b (or (parse-string-loop chars (getb) :base) (badend))]
        (table.remove stack)
        (let [raw (string.char (unpack chars))
              formatted (raw:gsub "[\1-\31]" (fn [c] (.. "\\" (c:byte))))
              load-fn ((or _G.loadstring load) (.. "return " formatted))]
          (dispatch (load-fn)))))

      (if (= b 59) ; comment
          (while true
            (set b (getb))
            (when (or (not b) (= b 10))
              (lua "break")))
          (= (type (. delims b)) :number) ; opening delimiter
          (do
            (when (not whitespace-since-dispatch)
              (parse-error (.. "expected whitespace before opening delimiter "
                              (string.char b))))
            (table.insert stack (setmetatable {:bytestart byteindex
                                               :closer (. delims b)
                                               :filename filename
                                               :line line}
                                              (getmetatable (utils.list)))))
          (. delims b) ; closing delimiter
          (let [last (. stack (# stack))]
            (when (= (# stack) 0)
              (parse-error (.. "unexpected closing delimiter " (string.char b))))
            (var val nil)
            (when (not= last.closer b)
              (parse-error (.. "mismatched closing delimiter " (string.char b)
                              ", expected " (string.char last.closer))))
            (set last.byteend byteindex) ; set closing byte index
            (if (= b 41)
                (set val last)
                (= b 93)
                (do
                  (set val (utils.sequence (unpack last)))
                  ;; for table literals we can store file/line/offset source
                  ;; data in fields on the table itself, because the AST node
                  ;; *is* the table, and the fields would show up in the
                  ;; compiled output. keep them on the metatable instead.
                  (each [k v (pairs last)]
                    (tset (getmetatable val) k v)))
                (do
                  (when (not= (% (# last) 2) 0)
                    (set byteindex (- byteindex 1))
                    (parse-error "expected even number of values in table literal"))
                  (set val [])
                  (setmetatable val last) ; see note above about source data
                  (for [i 1 (# last) 2]
                    (when (and (= (tostring (. last i)) ":")
                               (utils.sym? (. last (+ i 1)))
                               (utils.sym? (. last i)))
                      (tset last i (tostring (. last (+ i 1)))))
                    (tset val (. last i) (. last (+ i 1))))))
            (tset stack (# stack) nil)
            (dispatch val))
          (= b 34) ; quoted string
          (let [chars [34]]
            (var state "base")
            (tset stack (+ (# stack) 1) {:closer 34})
            (while true
              (set b (getb))
              (tset chars (+ (# chars) 1) b)
              (if (= state "base")
                  (if (= b 92)
                      (set state "backslash")
                      (= b 34)
                      (set state "done"))
                  (set state "base"))
              (when (or (not b) (= state "done"))
                (lua "break")))
            (when (not b)
              (badend))
            (tset stack (# stack) nil)
            (let [raw (string.char (unpack chars))
                  formatted (raw:gsub "[\1-\31]" (fn [c] (.. "\\" (c:byte))))
                  load-fn ((or _G.loadstring load)
                          (string.format "return %s" formatted))]
              (dispatch (load-fn))))
          (. prefixes b)
          (do ; expand prefix byte into wrapping form eg. '`a' into '(quote a)'
            (table.insert stack {:prefix (. prefixes b)})
            (let [nextb (getb)]
              (when (whitespace? nextb)
                (when (not= b 35)
                  (parse-error "invalid whitespace after quoting prefix"))
                (tset stack (# stack) nil)
                (dispatch (utils.sym "#")))
              (ungetb nextb)))
          (or (symbolchar? b) (= b (string.byte "~"))) ; try sym
          (let [chars []
                bytestart byteindex]
            (while true
              (tset chars (+ (# chars) 1) b)
              (set b (getb))
              (when (or (not b) (not (symbolchar? b)))
                (lua "break")))
            (when b
              (ungetb b))
            (local rawstr (string.char (unpack chars)))
            (if (= rawstr "true")
                (dispatch true)
                (= rawstr "false")
                (dispatch false)
                (= rawstr "...")
                (dispatch (utils.varg))
                (rawstr:match "^:.+$")
                (dispatch (rawstr:sub 2))
                ;; for backwards-compatibility, special-case allowance
                ;; of ~= but all other uses of ~ are disallowed
                (and (rawstr:match "^~") (not= rawstr "~="))
                (parse-error "illegal character: ~")
                (let [force-number (rawstr:match "^%d")
                      number-with-stripped-underscores (rawstr:gsub "_" "")]
                  (var x nil)
                  (if force-number
                      (set x (or (tonumber number-with-stripped-underscores)
                                 (parse-error (.. "could not read number \""
                                                 rawstr "\""))))
                      (do
                        (set x (tonumber number-with-stripped-underscores))
                        (when (not x)
                          (if (rawstr:match "%.[0-9]")
                              (do
                                (set byteindex (+ (+ (- byteindex (# rawstr))
                                                     (rawstr:find "%.[0-9]")) 1))
                                (parse-error (.. "can't start multisym segment "
                                                "with a digit: " rawstr)))
                              (and (rawstr:match "[%.:][%.:]")
                                   (not= rawstr "..")
                                   (not= rawstr "$..."))
                              (do
                                (set byteindex (+ (- byteindex (# rawstr)) 1
                                                  (rawstr:find "[%.:][%.:]")))
                                (parse-error (.. "malformed multisym: " rawstr)))
                              (rawstr:match ":.+[%.:]")
                              (do
                                (set byteindex (+ (- byteindex (# rawstr))
                                                  (rawstr:find ":.+[%.:]")))
                                (parse-error (.. "method must be last component "
                                                "of multisym: " rawstr)))
                              (set x (utils.sym rawstr nil {:byteend byteindex
                                                            :bytestart bytestart
                                                            :filename filename
                                                            :line line}))))))
                  (dispatch x))))
    (fn parse-prefix [b]
      "expand prefix byte into wrapping form eg. '`a' into '(quote a)'"
      (table.insert stack {:prefix (. prefixes b)})
      (let [nextb (getb)]
        (when (whitespace? nextb)
          (when (not= b 35)
            (parse-error "invalid whitespace after quoting prefix"))
          (table.remove stack)
          (dispatch (utils.sym "#")))
        (ungetb nextb)))

    (fn parse-sym-loop [chars b]
      (if (and b (symbolchar? b))
          (do (table.insert chars b)
              (parse-sym-loop chars (getb)))
          (do (when b (ungetb b))
              chars)))

    (fn parse-number [rawstr bytestart]
      (let [force-number (rawstr:match "^%d")
            number-with-stripped-underscores (rawstr:gsub "_" "")]
        (if force-number
            (do (dispatch (or (tonumber number-with-stripped-underscores)
                              (parse-error (.. "could not read number \""
                                               rawstr "\""))))
                true)
            (match (tonumber number-with-stripped-underscores)
              x (do (dispatch x) true)
              _ false))))

    (fn check-malformed-sym [rawstr bytestart]
      ;; for backwards-compatibility, special-case allowance of ~= but
      ;; all other uses of ~ are disallowed
      (if (and (rawstr:match "^~") (not= rawstr "~="))
          (parse-error "illegal character: ~")
          (rawstr:match "%.[0-9]")
          (parse-error (.. "can't start multisym segment "
                           "with a digit: " rawstr)
                       (+ (+ (- byteindex (# rawstr))
                             (rawstr:find "%.[0-9]")) 1))
          (and (rawstr:match "[%.:][%.:]")
               (not= rawstr "..") (not= rawstr "$..."))
          (parse-error (.. "malformed multisym: " rawstr)
                       (+ (- byteindex (# rawstr)) 1
                          (rawstr:find "[%.:][%.:]")))
          (rawstr:match ":.+[%.:]")
          (parse-error (.. "method must be last component "
                           "of multisym: " rawstr)
                       (+ (- byteindex (# rawstr))
                          (rawstr:find ":.+[%.:]")))))

    (fn parse-sym [b] ; not just syms actually...
      (let [bytestart byteindex
            rawstr (string.char (unpack (parse-sym-loop [b] (getb))))]
        (if (= rawstr "true")
            (dispatch true)
            (= rawstr "false")
            (dispatch false)
            (= rawstr "...")
            (dispatch (utils.varg))
            (rawstr:match "^:.+$")
            (dispatch (rawstr:sub 2))
            (parse-number rawstr bytestart) nil
            (check-malformed-sym rawstr bytestart) nil
            (dispatch (utils.sym rawstr nil {:byteend byteindex
                                             :bytestart bytestart
                                             :filename filename
                                             :line line})))))

    (fn parse-loop [b]
      (if (not b) nil
          (= b 59) (skip-comment (getb))
          (= (type (. delims b)) :number) (open-table b)
          (. delims b) (close-table b)
          (= b 34) (parse-string b)
          (. prefixes b) (parse-prefix b)
          (or (symbolchar? b) (= b (string.byte "~"))) (parse-sym b)
          (parse-error (.. "illegal character: " (string.char b))))
      (when done?
        (lua "break")))
    (values true retval))

      (if (not b) nil ; EOF
          done? (values true retval)
          (parse-loop (skip-whitespace (getb)))))

    (parse-loop (skip-whitespace (getb))))
  (values parse-stream (fn [] (set stack []))))

{: granulate : parser : string-stream}

M test/core.fnl => test/core.fnl +1 -0
@@ 49,6 49,7 @@
      (l.assertEquals (fennel.eval code {:correlate true}) expected code))))

(fn test-parsing []
  (set _G.parsedbg true)
  (let [cases {"\"\\\\\"" "\\"
               "\"abc\n\\240\"" "abc\n\240"
               "\"abc\\\"def\"" "abc\"def"