about summary refs log tree commit diff
path: root/third_party/lisp/npg/src/define.lisp
blob: 783f071fc5d922c0de4ee170e200c3ac7a26d994 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
;;;  define.lisp --- grammar rules definition

;;;  Copyright (C) 2003-2006, 2009 by Walter C. Pelissero

;;;  Author: Walter C. Pelissero <walter@pelissero.de>
;;;  Project: NPG a Naive Parser Generator

#+cmu (ext:file-comment "$Module: define.lisp $")

;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 2.1
;;; of the License, or (at your option) any later version.
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA

(in-package :naive-parser-generator)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defvar *smart-default-reduction* t
  "If true the default reductions take only the non-static tokens -
those that are not declared as strings in the grammar.")

;; These two are filled with DEFRULE.
(defvar *rules* (make-rules-table))
(defvar *keywords* (make-keywords-table))

(defun make-action-arguments (tokens)
  "Given a list of tokens making up a production, return three values:
the list of variables for the function reducing this production, those
that are non static and their unambiguous user-friendly names."
  (flet ((unique (sym list)
           (if (not (assoc sym list))
               sym
               (loop
                  for i of-type fixnum from 2
                  for x = (intern (format nil "~:@(~A~)~A" sym i))
                  while (assoc x list)
                  finally (return x)))))
    (loop
       for tok in tokens
       for i of-type fixnum from 1
       for arg = (intern (format nil "$~A" i) (find-package #.*package*))
       collect arg into args
       unless (const-terminal-p tok)
         collect arg into vars
         and when (symbolp tok)
           collect (list (unique tok named-vars) arg) into named-vars
       when (and (listp tok)
                 (symbolp (cadr tok)))
         collect (list (unique (cadr tok) named-vars) arg) into named-vars
       finally
       (return (values args vars named-vars)))))

(defun make-action-function (name tokens action)
  "Create a function with name NAME, arguments derived from TOKENS and
body ACTION.  Return it's definition."
  (let ((function
         (multiple-value-bind (args vars named-vars)
             (make-action-arguments tokens)
           `(lambda ,args
              (declare (ignorable ,@args))
              (let (($vars (list ,@vars))
                    ($all (list ,@args))
                    ,@named-vars
                    ($alist (list ,@(mapcar #'(lambda (v)
                                                `(cons ',(intern (symbol-name (car v)))
                                                       ,(cadr v)))
                                            named-vars))))
                (declare (ignorable $vars $all $alist ,@(mapcar #'car named-vars)))
                (flet ((make-object (&optional type args)
                         (apply #'make-instance (or type ',name)
                                (append args $alist))))
                  ,action))))))
    (when *compile-print*
      (if *compile-verbose*
          (format t "; Compiling ~S:~%  ~S~%" name function)
          (format t "; Compiling ~S~%" name)))
    (compile name function)))

(defun define-rule (name productions)
  "Accept a rule in EBNF-like syntax, translate it into a sexp and a
call to INSERT-RULE-IN-CURRENT-GRAMMAR."
  (flet ((transform (productions)
           (loop
              for tok in productions
              with prod = nil
              with action = nil
              with phase = nil
              with new-prods = nil
              while tok
              do (cond ((eq tok :=)
                        (push (list (nreverse prod) action) new-prods)
                        (setf prod nil
                              action nil
                              phase :prod))
                       ((eq tok :reduce)
                        (setf phase :action))
                       ((eq tok :tag)
                        (setf phase :tag))
                       ((eq phase :tag)
                        (setf action `(cons ,tok $vars)))
                       ((eq phase :action)
                        (setf action tok))
                       ((eq phase :prod)
                        (push tok prod)))
              finally
                (return (cdr (nreverse (cons (list (nreverse prod) action) new-prods)))))))
    (insert-rule-in-current-grammar name (transform productions))))

(defmacro defrule (name &rest productions)
  "Wrapper macro for DEFINE-RULE."
  `(define-rule ',name ',productions))

(defun make-optional-rule (token)
  "Make a rule for a possibly missing (non)terminal (? syntax) and
return it."
  (insert-rule-in-current-grammar
   (gensym (concatenate 'string "OPT-"
                        (if (rule-p token)
                            (symbol-name (rule-name token))
                            (string-upcase token))))
   `(((,token)) (()))))

(defun make-alternative-rule (tokens)
  "Make a rule for a list of alternatives (\"or\" syntax) and return it."
  (insert-rule-in-current-grammar
   (gensym "ALT")
   (mapcar #'(lambda (alternative)
               `((,alternative)))
           tokens)))

(defun make-nonempty-list-rule (token &optional separator)
  "Make a rule for a non-empty list (+ syntax) and return it."
  (let ((rule-name (gensym (concatenate 'string "NELST-"
                                        (if (rule-p token)
                                            (symbol-name (rule-name token))
                                            (string-upcase token))))))
    (insert-rule-in-current-grammar
     rule-name
     (if separator
         `(((,token ,separator ,rule-name)
            (cons $1 $3))
           ((,token) ,#'list))
         `(((,token ,rule-name)
            (cons $1 $2))
           ((,token) ,#'list))))))

(defun make-list-rule (token &optional separator)
  "Make a rule for a possibly empty list (* syntax) return it."
  (make-optional-rule (make-nonempty-list-rule token separator)))

(defun const-terminal-p (object)
  (or (stringp object)
      (keywordp object)))

(defun expand-production-token (tok)
  "Translate token of the type NAME? or NAME* or NAME+ into (? NAME)
or (* NAME) or (+ NAME).  This is used by the DEFRULE macro."
  (if (symbolp tok)
      (let* ((name (symbol-name tok))
             (last (char name (1- (length name))))
             ;; this looks silly but we need to make sure that we
             ;; return symbols interned in this package, no one else
             (op (cadr (assoc last '((#\? ?) (#\+ +) (#\* *))))))
        (if (and (> (length name) 1) op)
            (list op
                  (intern (subseq name 0 (1- (length name)))))
            tok))
      tok))

(defun EBNF-to-SEBNF (tokens)
  "Take a production as a list of TOKENS and expand it.  This turns a
EBNF syntax into a sexp-based EBNF syntax or SEBNF."
  (loop
     for tok in tokens
     for token = (expand-production-token tok)
     with new-tokens = '()
     do (cond ((member token '(* + ?))
               (setf (car new-tokens)
                     (list token (car new-tokens))))
              (t
               (push token new-tokens)))
     finally (return (nreverse new-tokens))))

(defun SEBNF-to-BNF (tokens)
  "Take a production in SEBNF (Symbolic Extended BNF) syntax and turn
it into BNF.  The production is simplified but the current grammar is
populated with additional rules."
  (flet ((make-complex-token-rule (tok)
           (ecase (car tok)
             (* (apply #'make-list-rule (cdr tok)))
             (+ (apply #'make-nonempty-list-rule (cdr tok)))
             (? (make-optional-rule (cadr tok)))
             (or (make-alternative-rule (cdr tok))))))
    (loop
       for token in tokens
       with new-tokens = '()
       with keywords = '()
       do (cond ((listp token)
                 (push (make-complex-token-rule token) new-tokens))
                (t
                 (push token new-tokens)
                 (when (const-terminal-p token)
                   (push token keywords))))
       finally (return (values (nreverse new-tokens) keywords)))))

(defun make-default-action-function (name tokens)
  "Create a sexp to be used as default action in case one is not
supplied in the production.  This is usually a quite sensible
one.  That is, only the non-constant tokens are returned in a
list and in case only a variable token is available that one is
returned (not included in a list).  If all the tokens are
constant, then all of them are returned in a list."
  (cond ((null tokens)
         ;; if the production matched the empty list (no tokens) we
         ;; return always nil, that is the function LIST applied to no
         ;; arguments
         #'list)
        ((null (cdr tokens))
         ;; if the production matches just one token we simply return
         ;; that
         #'identity)
        (*smart-default-reduction*
         ;; If we are required to be "smart" then create a function
         ;; that simply returns the non static tokens of the
         ;; production.  If the production doesn't have nonterminal,
         ;; then return all the tokens.  If the production has only
         ;; one argument then return that one only.
         (make-action-function name tokens '(cond
                                             ((null $vars) $all)
                                             ((null (cdr $vars)) (car $vars))
                                             (t $vars))))
        (t
         ;; in all the other cases we return all the token matching
         ;; the production
         #'list)))

(defun make-production-from-descr (name production-description)
  "Take a production NAME and its description in the form of a sexp
and return a production structure object together with a list of used
keywords."
  (destructuring-bind (tokens &optional action) production-description
    (let ((expanded-tokens (EBNF-to-SEBNF tokens)))
      (multiple-value-bind (production-tokens keywords)
          (sebnf-to-bnf expanded-tokens)
      (let ((funct
             (cond ((not action)
                    (make-default-action-function name expanded-tokens))
                   ((or (listp action)
                        ;; the case when the action is simply to
                        ;; return a token (ie $2) or a constant value
                        (symbolp action))
                    (make-action-function name expanded-tokens action))
                   ((functionp action)
                    action)
                   (t			; action is a constant
                    #'(lambda (&rest args)
                        (declare (ignore args))
                        action)))))
        (values
         ;; Make a promise instead of actually resolving the
         ;; nonterminals.  This avoids endless recursion.
         (make-production :tokens production-tokens
                          :tokens-length (length production-tokens)
                          :action funct)
         keywords))))))

(defun remove-immediate-left-recursivity (rule)
  "Turn left recursive rules of the type
    A -> A x | y
into
    A -> y A2
    A2 -> x A2 | E
where E is the empty production."
  (let ((name (rule-name rule))
        (productions (rule-productions rule)))
    (loop
       for prod in productions
       for tokens = (prod-tokens prod)
       ;; when immediately left recursive
       when (eq (car tokens) rule)
       collect prod into left-recursive
       else
       collect prod into non-left-recursive
       finally
         ;; found any left recursive production?
         (when left-recursive
           (warn "rule ~S is left recursive" name)
           (let ((new-rule (make-rule :name (gensym "REWRITE"))))
             ;; A -> y A2
             (setf (rule-productions rule)
                   (mapcar #'(lambda (p)
                               (let ((tokens (prod-tokens p))
                                     (action (prod-action p)))
                                 (make-production :tokens (append tokens (list new-rule))
                                                  :tokens-length (1+ (prod-tokens-length p))
                                                  :action #'(lambda (&rest args)
                                                              (let ((f-A2 (car (last args)))
                                                                    (head (butlast args)))
                                                                (funcall f-A2 (apply action head)))))))
                           non-left-recursive))
             ;; A2 -> x A2 | E
             (setf (rule-productions new-rule)
                   (append
                    (mapcar #'(lambda (p)
                                (let ((tokens (prod-tokens p))
                                      (action (prod-action p)))
                                  (make-production :tokens (append (cdr tokens) (list new-rule))
                                                   :tokens-length (prod-tokens-length p)
                                                   :action #'(lambda (&rest args)
                                                               (let ((f-A2 (car (last args)))
                                                                     (head (butlast args)))
                                                                 #'(lambda (x)
                                                                     (funcall f-A2 (apply action x head))))))))
                            left-recursive)
                    (list
                     (make-production :tokens nil
                                      :tokens-length 0
                                      :action #'(lambda () #'(lambda (arg) arg)))))))))))

(defun remove-left-recursivity-from-rules (rules)
  (loop
     for rule being each hash-value in rules
     do
     ;; More to be done here.  For now only the trivial immediate left
     ;; recursivity is removed -wcp18/11/03.
       (remove-immediate-left-recursivity rule)))

(defun resolve-all-nonterminals (rules)
  (loop
     for rule being each hash-value in rules
     do (loop
           for production in (rule-productions rule)
           do (setf (prod-tokens production)
                    (resolve-nonterminals (prod-tokens production) rules)))))

(defun make-rule-productions (rule-name production-descriptions)
  "Return a production object that belongs to RULE-NAME made according
to PRODUCTION-DESCRIPTIONS.  See also MAKE-PRODUCTION-FROM-DESCR."
  (loop
     for descr in production-descriptions
     for i of-type fixnum from 1 by 1
     for prod-name = (intern (format nil "~:@(~A~)-PROD~A" rule-name i))
     with productions = '()
     with keywords = '()
     do (progn
          (multiple-value-bind (production keyws)
              (make-production-from-descr prod-name descr)
            (push production productions)
            (setf keywords (append keyws keywords))))
     finally (return
               (values (nreverse productions) keywords))))

(defun create-rule (name production-descriptions)
  "Return a new rule object together with a list of keywords making up
the production definitions."
  (multiple-value-bind (productions keywords)
      (make-rule-productions name production-descriptions)
    (values (make-rule :name name :productions productions)
            keywords)))

(defun insert-rule-in-current-grammar (name productions)
  "Add rule to the current grammar and its keywords to the keywords
hash table.  You don't want to use this directly.  See DEFRULE macro
instead."
  (when (find-rule name *rules*)
    (error "redefining rule ~A" name))
  (multiple-value-bind (rule keywords)
      (create-rule name productions)
    (add-rule name rule *rules*)
    (dolist (term keywords)
      (add-keyword term *keywords*))
    rule))

(defun resolve-nonterminals (tokens rules)
  "Given a list of production tokens, try to expand the nonterminal
ones with their respective rule from the the RULES pool."
  (flet ((resolve-symbol (sym)
           (or (find-rule sym rules)
               sym)))
    (mapcar #'(lambda (tok)
                (if (symbolp tok)
                    (resolve-symbol tok)
                    tok))
            tokens)))

(defun reset-grammar ()
  "Empty the current grammar from any existing rule."
  (setf *rules* (make-rules-table)
        *keywords* (make-keywords-table)))

(defun generate-grammar (&optional (equal-p #'string-equal))
  "Return a GRAMMAR structure suitable for the PARSE function, using
the current rules.  EQUAL-P, if present, is a function to be used to
match the input tokens; it defaults to STRING-EQUAL."
  (resolve-all-nonterminals *rules*)
  (remove-left-recursivity-from-rules *rules*)
  (make-grammar :rules *rules*
                :keywords *keywords*
                :equal-p equal-p))