Projects : gscm : gscm_usrbin
1 | ;;;; Front-end compiler for the Gales Scheme interpreter |
2 | ;;; Jacob Welsh, Jan 2017 |
3 | ;;; |
4 | ;;; This "compiler" performs structural checks on the various expression types so the interpreter can avoid doing them repeatedly, copies to prevent mutation by user code after checking, replaces symbols with container objects for memoizing variable references, and implements a simplified (non-hygienic) macro language and some special cases for translating fancier expression types to primitive forms. |
5 | ;;; |
6 | ;;; To avoid the chicken-and-egg bootstrapping problem, the code is restricted to the forms supported directly by the interpreter (LAMBDA, QUOTE, IF, SET!, BEGIN, LETREC, DELAY). It should be scrutinized carefully: any malformed expressions in its code may invoke the dreaded Undefined Behavior in the interpreter, and any incorrect output opens the interpreter to UB from user code. |
7 | ;;; |
8 | ;;; For ease of loading from C, the compiler is structured as a single expression evaluating to a procedure. This is then registered as the global compiler procedure and is called on all further expressions passed to EVAL. |
9 | ;;; |
10 | ;;; The compiler's environment must not be mutable from user code, and some extensions are required. |
11 | ;;; |
12 | ;;; See also test-compiler.scm. |
13 | ;;; |
14 | ;;; BUGS: The macro language is fairly stupid -- I've got some reading to do on the subject. |
15 | |
16 | (letrec |
17 | ((err-context #f) |
18 | (cerror |
19 | (lambda (msg . detail) |
20 | (apply/unchecked |
21 | error (cons (string-append (symbol->string err-context) ": " msg) |
22 | detail)))) |
23 | |
24 | (syn-env '()) |
25 | (builtin-syn-env #f) |
26 | |
27 | (vector-set/unchecked! vector-set!) |
28 | |
29 | (unspecified-value '()) |
30 | |
31 | (list (lambda args args)) |
32 | (map |
33 | (lambda (proc l) |
34 | (if (null? l) '() |
35 | (cons (proc (car l)) (map proc (cdr l)))))) |
36 | (for-each |
37 | (lambda (proc l) |
38 | (letrec ((loop (lambda (l) (if (null? l) l (step l)))) |
39 | (step (lambda (l) (proc (car l)) (loop (cdr l))))) |
40 | (loop l)))) |
41 | (all |
42 | (lambda (pred items) |
43 | (if (null? items) #t |
44 | (if (pred (car items)) (all pred (cdr items)) |
45 | #f)))) |
46 | (compose (lambda (f g) (lambda (x) (f (g x))))) |
47 | (vmap |
48 | (lambda (proc vec) |
49 | (letrec ((result (make-vector (vector-length vec))) |
50 | (loop (lambda (k) (if (fx=/unchecked k -1) result (step k)))) |
51 | (step (lambda (k) |
52 | (vector-set/unchecked! |
53 | result k (proc (vector-ref/unchecked vec k))) |
54 | (loop (fx-/unchecked k 1))))) |
55 | (loop (fx-/unchecked (vector-length vec) 1))))) |
56 | (deep-copy/immutable |
57 | (lambda (x) |
58 | (if (pair? x) (cons/immutable (deep-copy/immutable (car/unchecked x)) |
59 | (deep-copy/immutable (cdr/unchecked x))) |
60 | (if (vector? x) (vector-copy/immutable (vmap deep-copy/immutable x)) |
61 | (if (string? x) (string-copy/immutable x) |
62 | x))))) |
63 | (deep-copy #f) ;; alias binding |
64 | (memq |
65 | (lambda (x l) |
66 | (if (pair? l) (if (eq? x (car/unchecked l)) l |
67 | (memq x (cdr/unchecked l))) |
68 | #f))) |
69 | (not-list-of-length |
70 | (lambda (obj . lengths) |
71 | (if (list? obj) |
72 | (if (memq (length obj) lengths) #f #t) |
73 | #t))) |
74 | (assq |
75 | (lambda (x l) |
76 | (if (pair? l) (if (eq? x (caar l)) (car/unchecked l) |
77 | (assq x (cdr/unchecked l))) |
78 | #f))) |
79 | (append2 |
80 | (lambda (x y) |
81 | (if (pair? x) (cons (car/unchecked x) |
82 | (append2 (cdr/unchecked x) y)) |
83 | (if (null? x) y |
84 | (error |
85 | "COMPILER BUG: append2: non-final argument not a list"))))) |
86 | (append+/unchecked |
87 | (lambda (lists) |
88 | (if (pair? (cdr/unchecked lists)) |
89 | (append2 (car/unchecked lists) |
90 | (append+/unchecked (cdr/unchecked lists))) |
91 | (car/unchecked lists)))) |
92 | (append* |
93 | (lambda (lists) |
94 | (if (pair? lists) (append+/unchecked lists) |
95 | lists))) |
96 | (list-head |
97 | (lambda (l k) |
98 | (if (fx=/unchecked k 0) '() |
99 | (cons (car l) (list-head (cdr l) (fx-/unchecked k 1)))))) |
100 | (list-tail |
101 | (lambda (l k) |
102 | (if (fx=/unchecked k 0) l |
103 | (list-tail (cdr l) (fx-/unchecked k 1))))) |
104 | |
105 | (dict-extend (lambda (d name value) (cons (cons name value) d))) |
106 | (dict-get (lambda (d name default) |
107 | ((lambda (binding) (if binding (cdr binding) default)) |
108 | (assq name d)))) |
109 | |
110 | (add-syntax! |
111 | (lambda (name rules) |
112 | ((lambda (binding) |
113 | (if binding (set-cdr! binding rules) |
114 | (set! syn-env (dict-extend syn-env name rules)))) |
115 | (assq name syn-env)))) |
116 | |
117 | (with-split-list |
118 | (lambda (l k proc) |
119 | (proc (list-head l k) (list-tail l k)))) |
120 | |
121 | (check-ident |
122 | (lambda (obj) (if (symbol? obj) obj (cerror "not an identifier:" obj)))) |
123 | |
124 | ;; Check that a parameter list contains only symbols, no duplicates, and won't overflow internal limits (arity annotation in procedures and frame index in variable refs). Return the number of parameters, negated for an improper list. |
125 | (check-param-list |
126 | (lambda (vars) |
127 | (letrec |
128 | ((len 0) |
129 | (improper? #f) |
130 | (make-proper |
131 | ;; Unsurprisingly I hope, if you build an expression tree with a cyclic parameter list and eval it, you'll exhaust the heap. |
132 | (lambda (l) |
133 | (if (null? l) '() |
134 | (begin (if (fx=/unchecked len *max-parameters*) |
135 | (cerror "too many parameters")) |
136 | (set! len (fx+/unchecked len 1)) |
137 | (if (pair? l) |
138 | (cons (car/unchecked l) |
139 | (make-proper (cdr/unchecked l))) |
140 | (begin (set! improper? #t) |
141 | (list l))))))) |
142 | ;; Mergesort based duplicate search |
143 | (sort |
144 | (lambda (l) |
145 | (if (null? l) l |
146 | (if (null? (cdr/unchecked l)) l |
147 | (with-split-list |
148 | l (fxshift (length l) -1) |
149 | (lambda (left right) |
150 | (merge (sort left) (sort right)))))))) |
151 | (merge |
152 | (lambda (left right) |
153 | (if (null? left) right |
154 | (if (null? right) left |
155 | (if (string<? (car/unchecked left) (car/unchecked right)) |
156 | (cons (car/unchecked left) |
157 | (merge (cdr/unchecked left) right)) |
158 | (if (string<? (car/unchecked right) |
159 | (car/unchecked left)) |
160 | (cons (car/unchecked right) |
161 | (merge left (cdr/unchecked right))) |
162 | (cerror "duplicate name:" |
163 | (car/unchecked left))))))))) |
164 | (sort (map (compose symbol->string check-ident) (make-proper vars))) |
165 | (if improper? (fxneg/unchecked len) len)))) |
166 | (max-args |
167 | (lambda (form n) |
168 | (if (fx</unchecked n (length form)) (cerror "too many arguments")))) |
169 | |
170 | (compile |
171 | (lambda (expr allow-defs) |
172 | (set! err-context 'compile) |
173 | (if (pair? expr) |
174 | (if (list? expr) |
175 | (compile-combination (car/unchecked expr) (cdr/unchecked expr) |
176 | allow-defs) |
177 | (cerror "combination not a proper list")) |
178 | (if (symbol? expr) (variable-ref expr) |
179 | (if (string? expr) (string-copy/immutable expr) |
180 | (if (number? expr) expr |
181 | (if (boolean? expr) expr |
182 | (if (char? expr) expr |
183 | (if (null? expr) (cerror "empty combination") |
184 | (cerror "bad type (literal constant missing quote?):" |
185 | expr)))))))))) |
186 | (compile-expr-or-def (lambda (expr) (compile expr #t))) |
187 | (compile-expr (lambda (expr) (compile expr #f))) |
188 | (compile-combination |
189 | (lambda (operator operands allow-defs) |
190 | (if (symbol? operator) |
191 | ;; FIXME macro/variable shadowing |
192 | ((lambda (rules) ;; macro? |
193 | (if rules (compile-macro rules operator operands allow-defs) |
194 | ((lambda (r) ;; definition? |
195 | (if r r |
196 | ((lambda (r) ;; builtin form? |
197 | (if r r |
198 | ;; procedure call |
199 | (map compile-expr (cons operator operands)))) |
200 | (compile-builtin |
201 | (dict-get builtin-syn-env operator #f) |
202 | operator operands)))) |
203 | (if allow-defs (compile-definition operator operands) #f)))) |
204 | (dict-get syn-env operator #f)) |
205 | (if (pair? operator) |
206 | ;; procedure call with compound operator |
207 | (map compile-expr (cons operator operands)) |
208 | (cerror "bad operator type:" operator))))) |
209 | (compile-macro |
210 | (lambda (syn-rules keyword form allow-defs) |
211 | (set! err-context keyword) |
212 | (compile (transform form (car syn-rules) (cdr syn-rules)) allow-defs))) |
213 | (compile-definition |
214 | (lambda (keyword operands) |
215 | (if (eq? keyword 'define) (compile-define operands) |
216 | (if (eq? keyword 'define-syntax) (compile-define-syntax operands) |
217 | (if (eq? keyword 'begin) |
218 | (if (null? operands) |
219 | unspecified-value ; allow empty begin at top level |
220 | (cons 'begin (map compile-expr-or-def operands))) |
221 | #f))))) |
222 | (compile-builtin |
223 | (lambda (compiler-proc keyword operands) |
224 | (if compiler-proc |
225 | (if (procedure? compiler-proc) |
226 | (begin (set! err-context keyword) |
227 | (compiler-proc operands)) |
228 | ;; #t: builtin syntax with nothing to check |
229 | (cons keyword (map compile-expr operands))) |
230 | #f))) |
231 | (compile-body |
232 | (lambda (body) |
233 | ;; FIXME allow macros to expand to internal definitions |
234 | (letrec |
235 | ((definition? |
236 | (lambda (stmt) (if (pair? stmt) (eq? (car stmt) 'define) #f))) |
237 | (begin? |
238 | (lambda (stmt) (if (pair? stmt) (eq? (car stmt) 'begin) #f))) |
239 | (definitions? |
240 | (lambda (stmt) |
241 | (if (pair? stmt) |
242 | (if (eq? (car stmt) 'define) #t |
243 | (if (eq? (car stmt) 'begin) (all definitions? (cdr stmt)) |
244 | #f)) |
245 | #f))) |
246 | (flatten-defs |
247 | (lambda (stmt) |
248 | (if (begin? stmt) (flatten-defs (cdr stmt)) |
249 | (if (definition? stmt) (list stmt) |
250 | (append* (map flatten-defs stmt)))))) |
251 | (find-internal-defs |
252 | (lambda (defs body) |
253 | (if (if (pair? body) (definitions? (car body)) #f) |
254 | (find-internal-defs |
255 | (cons (car body) defs) (cdr body)) |
256 | (with-compiled-defs |
257 | (map (compose compile-define cdr) (flatten-defs defs)) |
258 | body)))) |
259 | (saved-context err-context) |
260 | (with-compiled-defs |
261 | (lambda (defs body) |
262 | (set! err-context saved-context) ; restore after compile-define |
263 | (if (null? body) |
264 | (if (null? defs) |
265 | (cerror "empty body") |
266 | (cerror |
267 | "missing expression after internal definitions"))) |
268 | (if (null? defs) |
269 | (map compile-expr body) |
270 | (begin (check-param-list (map cadr defs)) |
271 | (list (cons 'letrec |
272 | (cons (map cdr defs) |
273 | (map compile-expr body))))))))) |
274 | (find-internal-defs '() body)))) |
275 | (compile-lambda |
276 | (lambda (operands) |
277 | (if (null? operands) (cerror "missing parameters")) |
278 | ((lambda (arity params-copy) |
279 | (cons 'lambda |
280 | ;; annotate to save runtime traversals, see procedure() |
281 | (cons arity |
282 | (cons params-copy |
283 | (compile-body (cdr operands)))))) |
284 | (check-param-list (car operands)) |
285 | (deep-copy (car operands))))) |
286 | (compile-quote |
287 | (lambda (operands) |
288 | (if (null? operands) (cerror "missing argument")) |
289 | (max-args operands 1) |
290 | ((lambda (v) |
291 | (if (symbol? v) (list 'quote v) |
292 | (if (pair? v) (list 'quote (deep-copy/immutable v)) |
293 | ;; Save some branching for self-evaluating constants. Unquoted () or vectors aren't technically valid Scheme, but we've already caught this and permitting internally is harmless. |
294 | (if (null? v) v |
295 | (if (vector? v) (deep-copy/immutable v) |
296 | (if (number? v) v |
297 | (if (boolean? v) v |
298 | (if (char? v) v |
299 | (if (string? v) (string-copy/immutable v) |
300 | ;; EVAL shenanigans on non-readable objects |
301 | (cerror |
302 | "object not valid in program text:" |
303 | v)))))))))) |
304 | (car operands)))) |
305 | (compile-if |
306 | (lambda (operands) |
307 | (if (null? operands) (cerror "missing predicate")) |
308 | (if (null? (cdr operands)) (cerror "missing consequent")) |
309 | (max-args operands 3) |
310 | (cons 'if (map compile-expr operands)))) |
311 | (compile-set |
312 | (lambda (operands) |
313 | (if (null? operands) (cerror "missing name")) |
314 | (if (null? (cdr operands)) (cerror "missing value")) |
315 | (max-args operands 2) |
316 | (list 'set! (variable-ref (check-ident (car operands))) |
317 | (compile-expr (cadr operands))))) |
318 | (compile-begin |
319 | (lambda (operands) |
320 | (if (null? operands) (cerror "missing expression")) |
321 | (cons 'begin (map compile-expr operands)))) |
322 | (check-bindings |
323 | (lambda (bindings) |
324 | (if (not (list? bindings)) (cerror "bindings not a list:" bindings)) |
325 | (for-each (lambda (binding) |
326 | (if (not-list-of-length binding 2) |
327 | (cerror "ill-formed binding:" binding))) |
328 | bindings))) |
329 | (compile-let |
330 | (lambda (operands) |
331 | (if (null? operands) (cerror "missing bindings")) |
332 | (if (symbol? (car operands)) |
333 | (if (null? (cdr operands)) (cerror "missing bindings") |
334 | (compile-named-let |
335 | (car operands) (cadr operands) (cddr operands))) |
336 | (compile-plain-let (car operands) (cdr operands))))) |
337 | (compile-plain-let |
338 | (lambda (bindings body) |
339 | (check-bindings bindings) |
340 | (compile-expr |
341 | (cons (cons 'lambda (cons (map car bindings) body)) |
342 | (map cadr bindings))))) |
343 | (compile-named-let |
344 | (lambda (name bindings body) |
345 | (check-bindings bindings) |
346 | (compile-expr |
347 | (list 'letrec |
348 | (list (list name (cons 'lambda (cons (map car bindings) body)))) |
349 | (cons name (map cadr bindings)))))) |
350 | (compile-do |
351 | (lambda (operands) |
352 | (if (null? operands) (cerror "missing iteration specs")) |
353 | (if (null? (cdr operands)) (cerror "missing test clause")) |
354 | (letrec |
355 | ((specs (car operands)) |
356 | (test-clause (cadr operands)) |
357 | (commands (cddr operands)) |
358 | (loop-name 'tmp-123f7b03f3810f4a) |
359 | (step-expr |
360 | (lambda (spec) |
361 | (if (null? (cddr spec)) |
362 | (car spec) ; If <step> omitted, use <variable> |
363 | (caddr spec)))) |
364 | (make-loop-proc |
365 | (lambda () |
366 | (list |
367 | 'lambda (map car specs) |
368 | (list 'if (car test-clause) |
369 | ;; If <test> is true, return result of <expressions> |
370 | (if (null? (cdr test-clause)) |
371 | (list 'quote unspecified-value) |
372 | (cons 'begin (cdr test-clause))) |
373 | ;; Otherwise execute <commands> and loop |
374 | (cons 'begin (append2 |
375 | commands |
376 | (list (cons loop-name |
377 | (map step-expr specs)))))))))) |
378 | (if (not (list? specs)) |
379 | (cerror "iteration specs not a list:" specs)) |
380 | (for-each (lambda (spec) |
381 | (if (not-list-of-length spec 2 3) |
382 | (cerror "ill-formed iteration spec:" spec))) |
383 | specs) |
384 | (if (not (list? test-clause)) |
385 | (cerror "test clause not a list:" test-clause)) |
386 | (if (null? test-clause) (cerror "missing test expression")) |
387 | (compile-expr |
388 | (list 'letrec |
389 | (list (list loop-name (make-loop-proc))) |
390 | (cons loop-name (map cadr specs))))))) |
391 | |
392 | (compile-letrec |
393 | (lambda (operands) |
394 | ;; TODO annotate arity as with lambda (watch out for the other manual letrec constructions) |
395 | (if (null? operands) (cerror "missing bindings")) |
396 | (check-bindings (car operands)) |
397 | (check-param-list (map car (car operands))) |
398 | (cons 'letrec |
399 | (cons (map (lambda (binding) |
400 | (list (car binding) (compile-expr (cadr binding)))) |
401 | (car operands)) |
402 | (compile-body (cdr operands)))))) |
403 | (compile-define |
404 | (lambda (operands) |
405 | (set! err-context 'define) |
406 | (if (null? operands) (cerror "missing name")) |
407 | (if (pair? (car operands)) |
408 | ;; (define (proc . params) . body) |
409 | ;; -> (define proc (lambda params . body)) |
410 | (list 'define |
411 | (check-ident (caar operands)) |
412 | (compile-lambda (cons (cdar operands) (cdr operands)))) |
413 | ;; (define var value) |
414 | (begin (if (null? (cdr operands)) (cerror "missing value")) |
415 | (max-args operands 2) |
416 | (list 'define |
417 | (check-ident (car operands)) |
418 | (compile-expr (cadr operands))))))) |
419 | (compile-bad-define |
420 | (lambda (operands) |
421 | (cerror "only allowed at start of body or top level:" |
422 | (cons 'define operands)))) |
423 | (compile-bad-define-syntax |
424 | (lambda (operands) |
425 | (cerror "only allowed at top level:" (cons 'define-syntax operands)))) |
426 | (compile-delay |
427 | (lambda (operands) |
428 | (if (null? operands) (cerror "missing expression")) |
429 | (max-args operands 1) |
430 | (list 'delay (compile-expr (car operands))))) |
431 | (compile-define-syntax |
432 | (lambda (operands) |
433 | (set! err-context 'define-syntax) |
434 | (if (null? operands) (cerror "missing name")) |
435 | (if (null? (cdr operands)) (cerror "missing syntax-rules")) |
436 | (max-args operands 2) |
437 | (if (not (list? (cadr operands))) |
438 | (cerror "not a combination (expected syntax-rules):" |
439 | (cadr operands))) |
440 | (if (null? (cadr operands)) |
441 | (cerror "empty combination (expected syntax-rules)")) |
442 | (if (not (eq? (caadr operands) 'syntax-rules)) |
443 | (cerror "expected syntax-rules:" (cadr operands))) |
444 | (add-syntax! (check-ident (car operands)) |
445 | (compile-syntax-rules (car operands) (cdadr operands))) |
446 | unspecified-value)) |
447 | (compile-syntax-rules |
448 | (lambda (keyword operands) |
449 | (set! err-context 'syntax-rules) |
450 | (if (null? operands) (cerror "missing literals")) |
451 | (if (not (list? (car operands))) (cerror "literals not a list")) |
452 | (check-param-list (car operands)) |
453 | (cons (deep-copy (car operands)) |
454 | (map (compile-syntax-rule keyword (car operands)) |
455 | (cdr operands))))) |
456 | (compile-syntax-rule |
457 | (lambda (keyword literals) |
458 | (lambda (rule) |
459 | (if (not-list-of-length rule 2) (cerror "ill-formed rule")) |
460 | (list (compile-pattern keyword literals (car rule)) |
461 | (deep-copy (cadr rule)))))) |
462 | (compile-pattern |
463 | (lambda (keyword literals pat) |
464 | (letrec |
465 | ((get-pat-vars |
466 | ;; Walk a pattern, extending a list of pattern variables |
467 | (lambda (vars pat) |
468 | (if (pair? pat) (get-pat-vars (get-pat-vars vars (car pat)) |
469 | (cdr pat)) |
470 | (if (symbol? pat) |
471 | (if (eq? pat '...) (cerror "ellipsis unsupported") |
472 | (if (memq pat literals) vars (cons pat vars))) |
473 | vars))))) |
474 | (if (null? pat) (cerror "empty pattern")) |
475 | (if (not (pair? pat)) (cerror "pattern not a list:" pat)) |
476 | (if (not (eq? (car pat) keyword)) |
477 | (cerror "pattern doesn't begin with keyword:" pat)) |
478 | (check-param-list (get-pat-vars '() pat)) |
479 | (deep-copy pat)))) |
480 | |
481 | (transform |
482 | (lambda (form literals rules) |
483 | (letrec |
484 | ((try-rules |
485 | (lambda (rules) |
486 | (if (null? rules) (cerror "no matching syntax rule:" form)) |
487 | ;; rule is ((keyword . pattern) template) |
488 | ((lambda (dict) |
489 | (if dict (instantiate dict (cadar rules)) |
490 | (try-rules (cdr rules)))) |
491 | (match literals '() form (cdaar rules)))))) |
492 | (try-rules rules)))) |
493 | (match |
494 | (lambda (literals dict form pattern) |
495 | (if (symbol? pattern) |
496 | (if (memq pattern literals) |
497 | (if (eq? form pattern) dict #f) |
498 | (dict-extend dict pattern form)) |
499 | (if (pair? pattern) |
500 | (if (pair? form) |
501 | ((lambda (dict) |
502 | (if dict (match literals dict (cdr form) (cdr pattern)) |
503 | #f)) |
504 | (match literals dict (car form) (car pattern))) |
505 | #f) |
506 | (if (eq? form pattern) dict |
507 | #f))))) |
508 | (instantiate |
509 | (lambda (dict template) |
510 | (if (symbol? template) (dict-get dict template template) |
511 | (if (pair? template) (cons (instantiate dict (car template)) |
512 | (instantiate dict (cdr template))) |
513 | template))))) |
514 | |
515 | (set! deep-copy deep-copy/immutable) ;; distinction not currently necessary |
516 | |
517 | (set! builtin-syn-env |
518 | (list (cons 'lambda compile-lambda) |
519 | (cons 'quote compile-quote) |
520 | (cons 'if compile-if) |
521 | (cons 'set! compile-set) |
522 | (cons 'begin compile-begin) |
523 | (cons 'let compile-let) |
524 | (cons 'do compile-do) |
525 | (cons 'letrec compile-letrec) |
526 | (cons 'delay compile-delay) |
527 | (cons 'define compile-bad-define) |
528 | (cons 'define-syntax compile-bad-define-syntax))) |
529 | compile-expr-or-def) |