8c92d5466fefcf4a2774ef8b386f0947e2c86b2d — Brit Butler 1 year, 1 month ago 7178a2f
Add a basic parser.
4 files changed, 124 insertions(+), 3 deletions(-)

M rascal.asd
M src/docs.lisp
A src/parse.lisp
A src/protocol.lisp
M rascal.asd => rascal.asd +5 -2
@@ 13,14 13,17 @@
  :build-operation "program-op"
  :build-pathname "bin/rascal"
  :entry-point "rascal:main"
  :components ((:module "src"
                :serial t
                ((:file "debug")
                ((:file "protocol")
                 (:file "parse")
                 (:file "debug")
                 (:file "shell")
                 (:file "docs")
                 (:file "rascal"))))

M src/docs.lisp => src/docs.lisp +3 -1
@@ 8,7 8,9 @@
(defsection @rascal (:title "rascal")
  (@links section)
  (@overview section)
  (rascal.shell:@command section))
  (rascal.shell:@command section)
  (rascal.protocol:@protocol section)
  (rascal.parse:@parse section))

(defsection @links (:title "Links")
  "[repo]: https://git.sr.ht/~kingcons/rascal

A src/parse.lisp => src/parse.lisp +76 -0
@@ 0,0 1,76 @@
(mgl-pax:define-package :rascal.parse
  (:use :cl :alexandria :mgl-pax :esrap)
  (:import-from :serapeum #:~>>))

(in-package :rascal.parse)

(defsection @parse (:title "Parsing Tools")
  "[esrap]: https://scymtym.github.io/esrap/

Before we can begin the business of compiling programs, we have
to be able to parse them. Rascal's parser is based on the powerful
[esrap][esrap] PEG parsing library. Most importantly, it provides a
concrete implementation of the PARSE-PROGRAM generic-function defined
in `rascal.protocol` for pathnames and strings. Various types of AST
nodes are also provided."

  (primitive class)
  (operator-of (accessor primitive))
  (args-of (accessor primitive))
  (num class)
  (value-of (accessor num)))

(defclass primitive ()
  ((operator :initarg :operator :accessor operator-of :type symbol)
   (args :initarg :args :accessor args-of :type list)))

(defmethod print-object ((primitive primitive) stream)
  (print-unreadable-object (primitive stream :type t)
    (format stream "~S" (operator-of primitive))))

(defclass num ()
  ((value :initarg :value :accessor value-of :type fixnum)))

(defmethod print-object ((num num) stream)
  (print-unreadable-object (num stream :type t)
    (format stream "~D" (value-of num))))

(defun maybe-strip-whitespace (item)
  (if (consp item)
      (remove :ws item)

(defrule letter
    (or (character-ranges (#\a #\z))
        (character-ranges (#\A #\Z))))

(defrule digit
    (character-ranges (#\0 #\9)))

(defrule digits
    (+ digit)
  (:text t)
  (:lambda (x)
    (make-instance 'num :value (parse-integer x))))

(defrule whitespace
    (or #\Space #\Tab #\Newline)
  (:constant :ws))

(defrule prim-op
    (or "read" "+" "-")
  (:lambda (op) (intern (string-upcase op))))

(defrule form
    (and "(" prim-op (* (or whitespace digits form)) ")")
  (:lambda (list)
    (let ((children (~>> (1- (length list))
                         (subseq list 1)
                         (mapcar #'maybe-strip-whitespace))))
      (make-instance 'primitive
                     :operator (first children)
                     :args (second children)))))

;; (defrule identifier
;;     (+ (or letter digit #\- #\! #\?))
;;   (:text t))

A src/protocol.lisp => src/protocol.lisp +40 -0
@@ 0,0 1,40 @@
(mgl-pax:define-package :rascal.protocol
  (:use :cl :mgl-pax))

(in-package :rascal.protocol)

(defsection @protocol (:title "The Compiler Protocol")
  "For purposes of testing and organizing the various iterations of
compiler passes that make up rascal, we will have some common structure here.

First and foremost, there is a PROGRAM class that we will use to represent
parsed source fragments. Additionally, the *LANGUAGES* variable will keep
track of the subsets currently supported by the rascal suite. We also provide
a method LINT to validate a source program according to the language grammar
and a method INTERPRET to execute the program via a simple interpreter."
  (*languages* variable)
  (program class)
  (metadata (accessor program))
  (body (accessor program))
  (parse-program generic-function)
  (lint generic-function)
  (interpret generic-function))

(defvar *languages*
  "A list of symbols naming all language subsets that rascal supports.")

(defclass program ()
  ((metadata :initarg :metadata :accessor metadata :type list)
   (body :initarg :body :accessor body :type list)))

(defgeneric parse-program (source)
  (:documentation "Parse the supplied source into a PROGRAM with any available
metadata and a list of AST nodes as the body."))

(defgeneric lint (program language)
  (:documentation "Ensure that the supplied PROGRAM is valid according to
the rules of the supplied LANGUAGE."))

(defgeneric interpret (program language)
  (:documentation "Run the supplied PROGRAM as LANGUAGE and return the result."))