~remexre/nr-langutil

ref: 89672ba5a7d7c4721ac8ee46a597aeeaea98fc81 nr-langutil/parser/runtime.lisp -rw-r--r-- 6.1 KiB
89672ba5Nathan Ringo removes extra dbg 1 year, 3 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
(in-package :nr-langutil)

; TODO: :report
; TODO: More information; current parse stack? expected tokens?
(define-condition parser-error (error)
  ((expected :initarg :expected :reader parser-error/expected :type list)
   (got      :initarg :got      :reader parser-error/got      :type (or keyword null))
   (location :initarg :location :reader location              :type location))
  (:documentation "An error when a sequence of lexemes couldn't be parsed.")
  (:report (lambda (condition stream)
             (with-slots (expected got location) condition
               (pretty-print-location location stream "Got ~s, expected one of ~s" got expected)))))

(declaim (ftype (function (fixnum list) (values list list &optional)) pop-n)
         (inline pop-n))
(defun pop-n (n in)
  "Removes the first N items of IN, returning them in reverse order along with the nthcdr of IN."
  (declare (optimize (debug 0) (safety 0) (speed 3)))
  (let ((popped nil)
        (orig   in))
    (iter
      (for i from 0 below n)
      (declare (type fixnum i))
      (for c = orig)
      (check-type c (cons * list)) ; TODO: Change this to be a type assertion instead?
      (setf orig    (cdr c)
            (cdr c) popped
            popped  c))
    (values popped orig)))

(declaim (ftype (function (list location) (values location &optional)) location-from-list)
         (inline location-from-list))
(defun location-from-list (args lexeme-location)
  (declare (optimize (debug 0) (safety 0) (speed 3)))
  (let ((locations
          (iter
            (for arg in args)
            (when (typep arg 'has-location)
              (collect (location arg))))))
    (if locations
        (apply #'merge-locations locations) 
        lexeme-location)))

(declaim (ftype (function (lexeme
                           (simple-array (or (cons (eql :shift) fixnum)
                                             (cons (eql :reduce) fixnum)
                                             (eql :accept)
                                             (eql nil)) (* *))
                           fixnum
                           hash-table)
                          (values &optional)) signal-parser-error))
(defun signal-parser-error (lexeme action-table state terminals)
  (declare (optimize (debug 0) (safety 0) (speed 3)))
  (error 'parser-error
         :location (location lexeme)
         :got (lexeme/token lexeme)
         :expected (iter
                     (for i from 0 below (hash-table-count terminals))
                     (declare (type fixnum i))
                     (when (aref action-table state i)
                       (collect
                         (iter
                           (for (k v) in-hashtable terminals)
                           (declare (type fixnum v))
                           (when (= v i)
                             (return k))
                           (finally (error "missing terminal"))))))))

(declaim (ftype (function (list
                           (simple-array (or (cons (eql :shift) fixnum)
                                             (cons (eql :reduce) fixnum)
                                             (eql :accept)
                                             (eql nil)) (* *))
                           (simple-array (or fixnum null) (* *))
                           (simple-array (cons fixnum (cons fixnum function)) (*))
                           hash-table
                           fixnum)
                          (values t &optional)) run-lr-parser))
(defun run-lr-parser (lexemes action-table goto-table reduce-actions terminals initial-state)
  "Tries to parse LEXEMES using ACTION-TABLE, GOTO-TABLE, REDUCE-ACTIONS,
   TERMINALS. On a parse failure, signals a PARSER-ERROR.

   LEXEMES is as returned by a lexer generated by DEF/LEXER.
   ACTION-TABLE and GOTO-TABLE are as returned by MAKE-ACTION-GOTO-TABLES.
   TERMINALS are as returned by COLLECT-NAMES.

   REDUCE-ACTIONS is a vector of triples, where the car is the index into the
   GOTO-TABLE corresponding to the nonterminal the reduce action is for, the
   cddr is a function, and the cadr is the number of arguments the function
   takes. (Note that this includes ignored arguments in a production's clause;
   it's \"actually\" the number of items to pop off the parse stack.)"
  (declare (optimize (debug 0) (safety 0) (speed 3)))

  (let ((lexemes lexemes)
        (state-stack (list initial-state)) ; TODO: Might make more sense to keep state-stack as a vector.
        (value-stack nil))
    (declare (type list lexemes state-stack value-stack))

    (iter
      (for lexeme = (car lexemes))
      (for token = (lexeme/token lexeme))
      (for state = (car state-stack))
      (for action = (multiple-value-bind (terminal-idx present) (gethash token terminals)
                      (unless present
                        (signal-parser-error lexeme action-table state terminals))
                      (aref action-table state terminal-idx)))
      (cond
        ((null action)
         (signal-parser-error lexeme action-table state terminals))
        ((eq action :accept)
         (check-type value-stack (cons * null))
         (return-from run-lr-parser (car value-stack)))
        ((eq (car action) :shift)
         (push (cdr action) state-stack)
         (push lexeme value-stack)
         (setf lexemes (cdr lexemes)))
        ((eq (car action) :reduce)
         (let ((reduce-action (aref reduce-actions (cdr action))))
           (declare (type (cons fixnum (cons fixnum function)) reduce-action))
           (let ((goto-idx (car reduce-action))
                 (arg-count (cadr reduce-action))
                 (func (cddr reduce-action)))
             (multiple-value-bind (args new-value-stack) (pop-n arg-count value-stack)
               (setf value-stack new-value-stack)
               (let ((*location* (location-from-list args (location lexeme))))
                 (push (apply func args) value-stack))
               (setf state-stack (nthcdr arg-count state-stack))
               (setf state-stack (cons (aref goto-table (car state-stack) goto-idx) state-stack))))))
        (t
         (error "Invalid ACTION-TABLE: found ~s rather than a valid action." action))))))