Projects : gscm : gscm_usrbin

gscm/src/gscm.c

Dir - Raw

1/**************
2 * Gales Scheme
3 *
4 * A Scheme interpreter for Unix striving for simplicity, soundness, minimal
5 * artificial restrictions, and R5RS compliance with strict error checking.
6 *
7 * J. Welsh
8 * January 2017 - April 2018
9 */
10
11#include <errno.h>
12#include <limits.h>
13#include <math.h>
14#include <setjmp.h>
15
16#include <fcntl.h>
17#include <unistd.h>
18#include <poll.h>
19#include <sys/mman.h>
20#include <sys/wait.h>
21
22#include <sys/socket.h>
23#include <sys/un.h>
24#include <netinet/in.h>
25
26#ifndef MAP_ANON
27#define MAP_ANON MAP_ANONYMOUS
28#endif
29
30int snprintf(char *, size_t, const char *, ...); /* to be replaced */
31void abort(void);
32size_t strlen(const char *);
33char *strerror(int);
34void *memcpy(void *, const void *, size_t);
35void *memset(void *, int, size_t);
36int memcmp(const void *, const void *, size_t);
37pid_t vfork(void);
38
39#include "gscm.h"
40
41
42/******************
43 * Memory structure
44 */
45
46/* The Scheme heap is an array of N-bit cells where N is the size of a machine
47 * address. */
48
49typedef size_t value;
50typedef value (*builtin_func_t)(value args);
51typedef unsigned char uchar;
52typedef unsigned long ulong;
53typedef unsigned int uint;
54
55/* Principal type tag: three most significant bits of cell */
56#define TAG_BITS 3
57
58#define T_SPECIAL 0 /* Special values listed below */
59#define T_MOVED 1 /* "Broken heart" pointer to GC moved object */
60#define T_IMMUT_PAIR 2 /* Pointer to car with cdr following */
61#define T_PAIR 3
62#define T_CHARACTER 4 /* Character in least significant byte */
63#define T_FIXNUM 5 /* N-3 bit two's complement signed integer */
64#define T_EXTENDED 6 /* Pointer to extended object */
65#define T_EXT_HEADER 7 /* Extended type header */
66
67/* Special values indicated by T_SPECIAL. Since that's zero, these can be
68 * compared with values directly. */
69#define SC_NULL 0
70#define SC_TRUE 1
71#define SC_FALSE 2
72#define SC_EOF 3
73#define SC_NULL_ENV 4
74#define SC_REPORT_ENV 5
75#define SC_GSCM_ENV 6
76#define SC_INTERACT_ENV 7
77#define SC_TOPLEVEL_ENV 8
78/* Inaccessible from Scheme */
79#define UNDEFINED 9
80#define RD_CLOSEPAREN 10 /* Returned internally by reader subroutines */
81#define RD_DOT 11
82
83/* T_SPECIAL is also implicitly (ab)used for return addresses (EV_*, RD_* and
84 * so on) and loop counters on the stack. GC doesn't have to know what they
85 * really are as long as it treats them as immediate values. */
86
87/* Extended objects consist of a header cell (T_EXT_HEADER) containing extended
88 * type information followed by possibly untagged data cells, depending on
89 * type. The four bits following the principal tag in the header are the
90 * extended type tag: */
91#define T_IMMUT_STRING 0x0
92#define T_STRING 0x1
93#define T_IMMUT_VECTOR 0x2
94#define T_VECTOR 0x3
95#define T_VARIABLE_REF 0x4
96#define T_SYMBOL 0x5
97#define T_BUILTIN 0x6
98#define T_PROCEDURE 0x7
99#define T_CONTINUATION 0x8
100#define T_PROMISE 0x9
101#define T_PORT 0xA
102#define T_FLONUM 0xB /* is_number assumes all numbers from here */
103#define T_BIGNUM 0xC
104#define T_NEG_BIGNUM 0xD
105#define T_RATIONAL 0xE
106#define T_COMPLEX 0xF
107
108/* Tags for types with immutable variants, both principal and extended, must
109 * be equal to the bitwise OR of 1 with the immutable variant. That is, the
110 * least significant tag bit is the mutability flag, where applicable. */
111
112/* Symbols, strings, vectors, and bignums store their length in the header as
113 * an N-7 bit unsigned integer. For vectors and bignums, that many cells
114 * follow. Strings and symbols are packed, so ceil(length/(N/8)) cells follow.
115 * Lexical variable references store the argument index in this space.
116 *
117 * Example for 32-bit systems:
118 * - Pointers/fixnums have 29 bits
119 * - Max heap size is 2^29 = 512M cells of 4 bytes = 2 GiB (4 during GC)
120 * - Longest string is 2^25 characters = 32 MiB
121 * - Longest vector is 2^25 cells = 128 MiB (not counting any pointer targets)
122 * - Longest bignum is 2^25 cells = 2^30 bits for a magnitute ~ 10^10^8
123 *
124 * If the size limits are a problem, the length could be stored in an untagged
125 * or fixnum cell after the header. */
126
127#if __SIZEOF_POINTER__ == 8
128#define VAL_BITS 61
129#define EXT_VAL_BITS 57
130#define FIXNUM_MAX 0x0FFFFFFFFFFFFFFF
131#define FIXNUM_MIN -0x1000000000000000
132#define EXT_LENGTH_MAX 0x01FFFFFFFFFFFFFF
133#define packed_str_len(bytes) (((bytes) + 7) >> 3)
134#define FLONUM_CELLS 1
135
136#elif __SIZEOF_POINTER__ == 4
137#define VAL_BITS 29
138#define EXT_VAL_BITS 25
139#define FIXNUM_MAX 0x0FFFFFFF
140#define FIXNUM_MIN -0x10000000
141#define EXT_LENGTH_MAX 0x01FFFFFF
142#define packed_str_len(bytes) (((bytes) + 3) >> 2)
143#define FLONUM_CELLS 2
144
145#else
146#error Unsupported pointer size
147#endif
148
149#define tag(v) (((value)(v)) >> VAL_BITS)
150#define add_tag(v, t) ((v) | (((value)(t)) << VAL_BITS))
151#define untag(v) ((((value)(v)) << 3) >> 3)
152#define untag_signed(v) (((long) (((value)(v)) << 3)) >> 3)
153#define ext_tag(v) (((v) >> EXT_VAL_BITS) & 0xF)
154#define ext_add_tag(v, t) ((v) | ((value)(t) << EXT_VAL_BITS) | \
155 (((value)T_EXT_HEADER) << VAL_BITS))
156#define ext_untag(v) ((((value)(v)) << 7) >> 7)
157#define ext_untag_signed(v) (((long) (((value)(v)) << 7)) >> 7)
158/* WARNING: add_tag/ext_add_tag assume v's tag bits are zero */
159
160static value car(value);
161static value cdr(value);
162
163
164/******************
165 * Scheme registers
166 */
167
168/* General purpose */
169static value r0, r1, r2, r3, r4, r5, r6;
170/* Special purpose */
171static value r_stack, r_spool, r_error_cont, r_signal_handler, r_compiler,
172 r_compiler_expr, r_input_port, r_output_port, r_dump;
173static enum {
174 f_none,
175 f_compile,
176 f_apply,
177 f_force,
178 f_call_with_values,
179 f_values,
180} r_flag;
181
182/* Register aliases to make usage more readable. Some rules for validation:
183 * - A subroutine may use a single register under different aliases, but before
184 * it is read or used as an argument under one alias, it must have been:
185 * - Assigned or declared as a parameter under the same alias, and
186 * - Not meanwhile assigned under a different alias.
187 * - Parameter registers must be distinct.
188 */
189#define R_EXPR r0 /* expression being evaluated */
190#define R_ARGS r0 /* arguments to apply procedure to */
191
192#define R_ENV r1 /* evaluation environment */
193#define R_PROC r1 /* procedure to apply */
194#define R_PORT r1 /* argument to I/O routines */
195#define R_ARG r1
196
197#define R_RESULT r2 /* subroutine return value */
198#define R_LEXEME r2
199#define R_FORMALS r2
200#define R_WIND_TO r2
201
202#define R_VARNAME r3
203#define R_TAIL r3 /* last pair of a list being built */
204#define R_LCA r3
205
206#define R_OPERANDS r4
207#define R_SECOND_LAST r4
208
209#define R_CAR r5 /* argument to cons or push */
210
211#define R_CDR r6 /* argument to cons */
212#define R_ITER r6
213
214
215/*****************
216 * Syscall helpers
217 */
218
219static int open_cloexec(const char *path, int flags) {
220 return open(path, flags | O_CLOEXEC, 0666);
221 /* Non-atomic version for systems lacking O_CLOEXEC
222 int fd = open(path, flags, 0666);
223 if (fd != -1) fcntl(fd, F_SETFD, FD_CLOEXEC);
224 return fd;
225 */
226}
227
228static int pipe_cloexec(int pipefd[2]) {
229 return pipe2(pipefd, O_CLOEXEC);
230 /* Non-atomic version for systems lacking pipe2
231 if (pipe(pipefd) == -1) return -1;
232 fcntl(pipefd[0], F_SETFD, FD_CLOEXEC);
233 fcntl(pipefd[1], F_SETFD, FD_CLOEXEC);
234 return 0;
235 */
236}
237
238/* Reliably catching close errors is NOT POSSIBLE on Linux and others. The call
239 * may block and be interrupted by a signal handler, yet cannot be retried as
240 * the FD is deallocated early. HPUX at least has the atypical behavior of
241 * leaving the FD open, so it would leak. Should figure out where exactly close
242 * can block. */
243static void blind_close(int fd) {
244 int saved_errno = errno;
245 close(fd);
246 errno = saved_errno;
247}
248
249static int poll1(int fd, short events, int timeout) {
250 int r;
251 struct pollfd sp;
252 sp.fd = fd;
253 sp.events = events;
254 while ((r = poll(&sp, 1, timeout)) == -1)
255 if (errno != EAGAIN && errno != EINTR) sc_perror();
256 return r;
257}
258
259static int write_all(int fd, const char *buf, ssize_t len) {
260 ssize_t n;
261 while ((n = write(fd, buf, len)) < len) {
262 if (n != -1) len -= n, buf += n;
263 else if (errno == EAGAIN || errno == EWOULDBLOCK)
264 poll1(fd, POLLOUT, -1);
265 else if (errno != EINTR) return -1;
266 }
267 return 0;
268}
269
270void sc_write_error(const char *msg) {
271 size_t len = strlen(msg);
272 if (len) write_all(2, msg, len);
273}
274#define write_err sc_write_error
275
276static void flush_all(void);
277
278__attribute__((noreturn))
279void sc_exit(int status) {
280 flush_all();
281 _exit(status);
282}
283
284
285/****************
286 * Error handling
287 */
288
289/* Failsafe error handler */
290
291__attribute__((noreturn))
292static void fatal(const char *msg) {
293 write_err("FATAL: ");
294 write_err(msg);
295 write_err("\n");
296 sc_exit(1);
297}
298
299__attribute__((noreturn))
300static void fatal1(const char *msg, const char *detail) {
301 write_err("FATAL: ");
302 write_err(msg);
303 write_err(": ");
304 write_err(detail);
305 write_err("\n");
306 sc_exit(1);
307}
308
309__attribute__((noreturn))
310void sc_error(const char *msg) { sc_error1(msg, UNDEFINED); }
311
312__attribute__((noreturn))
313void sc_perror(void) { sc_error(strerror(errno)); }
314
315__attribute__((noreturn))
316void sc_perror1(value detail) { sc_error1(strerror(errno), detail); }
317
318static int chkp(int r) { if (r == -1) sc_perror(); return r; }
319
320static const char *fmt_ulong_dec(ulong);
321
322__attribute__((noreturn))
323void sc_assert_fail(const char *file, ulong line, const char *func,
324 const char *expr) {
325 const char *sep = ": ";
326 static int aborting = 0;
327 if (!aborting) flush_all();
328 aborting = 1;
329 write_err("Assertion failed: ");
330 write_err(file); write_err(sep);
331 write_err(fmt_ulong_dec(line)); write_err(sep);
332 write_err(func); write_err(sep);
333 write_err(expr); write_err("\n");
334 abort();
335}
336
337/* various common errors */
338
339__attribute__((noreturn))
340static void not_a_number(value v) { sc_error1("not a number:", v); }
341
342
343/*******************************
344 * Garbage collector & allocator
345 */
346
347/* Heap discipline:
348 *
349 * This garbage collector uses the stop-and-copy (Minsky-Fenichel-Yochelson)
350 * method. Because it relocates values into a new heap and is triggered by
351 * allocation, any function that directly or indirectly calls sc_malloc cannot
352 * keep pointer types (T_PAIR, T_IMMUT_PAIR, T_EXTENDED) in local variables
353 * across such calls, as the addresses may be invalidated. The Scheme stack,
354 * registers, or otherwise statically stored variables registered as roots must
355 * be used instead.
356 *
357 * Such functions will generally be constructors and take their arguments
358 * through the stack or registers. Notably included are push and cons.
359 * Specifically not included are pop, peek, drop, car, cdr, set_car and
360 * set_cdr.
361 *
362 * The reward for this trouble is fast and compacting garbage collection.
363 */
364
365static value *heap, *new_heap;
366static value heap_size, free_ptr;
367
368#define ROOTS_ALLOC 48
369static value *roots[ROOTS_ALLOC];
370static value roots_fill;
371
372static void gc_root(value *handle) {
373 if (roots_fill >= ROOTS_ALLOC) fatal("insufficient ROOTS_ALLOC");
374 roots[roots_fill] = handle;
375 ++roots_fill;
376}
377
378static value ext_obj_size(value header) {
379 switch (ext_tag(header)) {
380 case T_IMMUT_STRING:
381 case T_STRING: return 1 + packed_str_len(ext_untag(header));
382 case T_IMMUT_VECTOR:
383 case T_VECTOR: return 1 + ext_untag(header);
384 case T_VARIABLE_REF: return 2;
385 case T_SYMBOL: return 1 + packed_str_len(ext_untag(header));
386 case T_BUILTIN: return 3;
387 case T_PROCEDURE: return 4;
388 case T_CONTINUATION: return 3;
389 case T_PROMISE: return 3;
390 case T_PORT: return 6;
391 case T_FLONUM: return 1 + FLONUM_CELLS;
392 case T_BIGNUM:
393 case T_NEG_BIGNUM: return 1 + ext_untag(header);
394 case T_RATIONAL: return 3;
395 case T_COMPLEX: return 3;
396 default: fatal("BUG: invalid extended tag");
397 }
398}
399
400/* Process one cell (in either a root or the new heap), returning number of
401 * cells to advance */
402static value scan_cell(value *scan_val) {
403 int scan_tag = tag(*scan_val);
404 value ptr, old_val, length;
405 assert(scan_tag != T_MOVED);
406 switch (scan_tag) {
407 case T_IMMUT_PAIR:
408 case T_PAIR:
409 case T_EXTENDED:
410 ptr = untag(*scan_val);
411 old_val = heap[ptr];
412 if (tag(old_val) == T_MOVED)
413 *scan_val = add_tag(untag(old_val), scan_tag);
414 else {
415 *scan_val = add_tag(free_ptr, scan_tag);
416 length = (scan_tag == T_EXTENDED) ? ext_obj_size(old_val) : 2;
417 memcpy(&new_heap[free_ptr], &heap[ptr], length*sizeof(value));
418 heap[ptr] = add_tag(free_ptr, T_MOVED);
419 free_ptr += length;
420 }
421 return 1;
422 case T_EXT_HEADER:
423 switch (ext_tag(*scan_val)) {
424 /* For compound types, skip the header and scan each element */
425 case T_IMMUT_VECTOR:
426 case T_VECTOR:
427 case T_VARIABLE_REF:
428 case T_PROCEDURE:
429 case T_CONTINUATION:
430 case T_PROMISE:
431 case T_PORT:
432 case T_RATIONAL:
433 case T_COMPLEX:
434 return 1;
435 /* Otherwise skip the whole blob */
436 default:
437 return ext_obj_size(*scan_val);
438 }
439 default:
440 /* All other principal types are immediate values */
441 return 1;
442 }
443}
444
445uint sc_gc_verbose = 0, sc_gc_thrash_factor = 16;
446
447void sc_gc(void) {
448 value root, scan_ptr, *tmp;
449 if (sc_gc_verbose) {
450 static ulong gc_count = 0;
451 write_err("GC: cycle ");
452 write_err(fmt_ulong_dec(++gc_count));
453 write_err(" | ");
454 }
455 free_ptr = 0;
456 for (root = 0; root < roots_fill; ++root) scan_cell(roots[root]);
457 for (scan_ptr = 0; scan_ptr < free_ptr;
458 scan_ptr += scan_cell(&new_heap[scan_ptr]))
459 assert(free_ptr <= heap_size);
460 tmp = heap;
461 heap = new_heap;
462 new_heap = tmp;
463 if (sc_gc_verbose) {
464 /* using floating point to avoid overflow */
465 double live_bytes = free_ptr*sizeof(value);
466 double live_pct = 100.*free_ptr/heap_size;
467 write_err(fmt_ulong_dec(free_ptr));
468 write_err(" cells | ");
469 write_err(fmt_ulong_dec((live_bytes+1023.)/1024.));
470 write_err("K | ");
471 write_err(fmt_ulong_dec(live_pct));
472 write_err(".");
473 write_err(fmt_ulong_dec(((unsigned)(10.*live_pct))%10));
474 write_err("% live\n");
475 }
476}
477
478static value sc_malloc(size_t cells) {
479 value result = free_ptr;
480 free_ptr += cells;
481 if (free_ptr > heap_size) {
482 sc_gc();
483 result = free_ptr;
484 free_ptr += cells;
485 if (free_ptr > (heap_size - heap_size/sc_gc_thrash_factor)) {
486 /* Clear registers in hopes of freeing space. While not guaranteed,
487 * this can help simple cases like recovering the REPL after a
488 * runaway recursion. */
489 r0 = r1 = r2 = r3 = r4 = r5 = r6 = r_stack = SC_NULL;
490 sc_error("out of memory");
491 }
492 }
493 return result;
494}
495
496
497/*************************
498 * Scheme stack operations
499 */
500
501/* Push R_CAR onto the stack (no other side effects) */
502static void push(void) {
503 value new_stack = sc_malloc(2);
504 heap[new_stack] = R_CAR;
505 heap[new_stack+1] = r_stack;
506 r_stack = add_tag(new_stack, T_PAIR);
507}
508
509/* Shorthand to push an arbitrary value */
510#define PUSH(val) { R_CAR = (val); push(); }
511
512/* Remove the top of the stack */
513static void drop(void) {
514 r_stack = cdr(r_stack);
515}
516
517/* Return the top of the stack */
518static value peek(void) {
519 return car(r_stack);
520}
521
522/* Remove and return the top of the stack */
523static value pop(void) {
524 value v = car(r_stack);
525 r_stack = cdr(r_stack);
526 return v;
527}
528
529
530/***************************************************
531 * Builtin type constructors, predicates & accessors
532 */
533
534static int is_ext_type(value v, uint t) {
535 return tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) == t;
536}
537
538static int is_mutable(value v) {
539 int t = tag(v);
540 if (t != T_EXTENDED) return t == T_PAIR;
541 t = ext_tag(heap[untag(v)]);
542 return t == T_STRING || t == T_VECTOR;
543}
544
545/* Booleans */
546
547static value boolean(int b) { return b ? SC_TRUE : SC_FALSE; }
548static int is_boolean(value v) { return v == SC_TRUE || v == SC_FALSE; }
549
550/* Pairs & lists */
551
552/* Return a new pair from the values of R_CAR and R_CDR */
553static value cons(void) {
554 value p = sc_malloc(2);
555 heap[p] = R_CAR;
556 heap[p+1] = R_CDR;
557 return add_tag(p, T_PAIR);
558}
559static value cons_immutable(void) {
560 value p = sc_malloc(2);
561 heap[p] = R_CAR;
562 heap[p+1] = R_CDR;
563 return add_tag(p, T_IMMUT_PAIR);
564}
565static int is_pair(value v) { return (tag(v) | 1) == T_PAIR; }
566static value car(value p) {
567 assert(is_pair(p));
568 return heap[untag(p)];
569}
570static value cdr(value p) {
571 assert(is_pair(p));
572 return heap[untag(p)+1];
573}
574static void set_car(value p, value v) {
575 assert(is_pair(p));
576 heap[untag(p)] = v;
577}
578static void set_cdr(value p, value v) {
579 assert(is_pair(p));
580 heap[untag(p)+1] = v;
581}
582static value safe_car(value p) {
583 if (!is_pair(p)) sc_error1("not a pair:", p);
584 return car(p);
585}
586static value safe_cdr(value p) {
587 if (!is_pair(p)) sc_error1("not a pair:", p);
588 return cdr(p);
589}
590#define cadr(x) car(cdr(x))
591
592/* Safely compute the length of a list, returning -1 if not a proper list */
593static long safe_list_length(value v) {
594 /* Floyd's cycle-finding algorithm */
595 value slow = v, fast = v, length = 0;
596 while (is_pair(fast)) {
597 slow = cdr(slow);
598 fast = cdr(fast);
599 length++;
600 if (!is_pair(fast)) break;
601 fast = cdr(fast);
602 if (fast == slow) return -1; /* cycle */
603 length++;
604 }
605 if (fast != SC_NULL) return -1; /* improper list or not a pair */
606 return length;
607}
608static int is_list(value v) { return safe_list_length(v) >= 0; }
609
610/* Compute the length of a proper list */
611static value list_length(value l) {
612 value length = 0;
613 for (; l != SC_NULL; l = cdr(l)) length++;
614 return length;
615}
616
617/* Find the first node shared by two proper lists; that is, the LCA of two
618 * nodes in the parent-pointer tree rooted at the empty list. */
619static value lowest_common_ancestor(value a, value b) {
620 value al = list_length(a), bl = list_length(b);
621 if (al != bl) {
622 if (al > bl)
623 do a = cdr(a), --al; while (al > bl);
624 else
625 do b = cdr(b), --bl; while (bl > al);
626 }
627 while (a != b) a = cdr(a), b = cdr(b);
628 return a;
629}
630
631/* Numbers */
632
633static value fixnum_zero, fixnum_one;
634
635/* Not bounds checked! */
636static value fixnum(long n) { return add_tag(untag(n), T_FIXNUM); }
637static int is_fixnum(value v) { return tag(v) == T_FIXNUM; }
638static long fixnum_val(value v) {
639 assert(is_fixnum(v));
640 return untag_signed(v);
641}
642static ulong unsigned_fixnum_val(value v) {
643 assert(is_fixnum(v));
644 return untag(v);
645}
646static long safe_fixnum_val(value v) {
647 if (is_fixnum(v)) return untag_signed(v);
648 sc_error1("not an integer or out of bounds:", v);
649}
650
651static value flonum(double x) {
652 value f = sc_malloc(1 + FLONUM_CELLS);
653 heap[f] = ext_add_tag(0, T_FLONUM);
654 /* strict aliasing?
655 *((double *)&heap[f+1]) = x; */
656 memcpy(&heap[f+1], &x, sizeof x);
657 return add_tag(f, T_EXTENDED);
658}
659static int is_flonum(value v) { return is_ext_type(v, T_FLONUM); }
660static double flonum_val(value f) {
661 /* strict aliasing?
662 return *((double *)&heap[untag(f)+1]); */
663 double x;
664 assert(is_flonum(f));
665 memcpy(&x, &heap[untag(f)+1], sizeof x);
666 return x;
667}
668
669static value make_bignum_uninit(value len, int neg) {
670 value ptr;
671 if (len > EXT_LENGTH_MAX) sc_error("length too large for bignum");
672 ptr = sc_malloc(1 + len);
673 heap[ptr] = ext_add_tag(len, T_BIGNUM | neg);
674 return add_tag(ptr, T_EXTENDED);
675}
676static int is_bignum(value v) {
677 return tag(v) == T_EXTENDED && (ext_tag(heap[untag(v)]) | 1) ==
678 T_NEG_BIGNUM;
679}
680static value bignum_len(value n) {
681 assert(is_bignum(n));
682 return ext_untag(heap[untag(n)]);
683}
684static value bignum_ref(value n, value k) {
685 assert(k < bignum_len(n));
686 return heap[untag(n)+k+1];
687}
688static void bignum_set(value n, value k, value word) {
689 assert(k < bignum_len(n));
690 assert(is_fixnum(word));
691 heap[untag(n)+k+1] = word;
692}
693static int is_bignum_negative(value n) {
694 assert(is_bignum(n));
695 return ext_tag(heap[untag(n)]) & 1;
696}
697static value bignum_set_negative(value n) {
698 assert(is_bignum(n));
699 heap[untag(n)] |= (1UL << EXT_VAL_BITS);
700 return n;
701}
702/* Truncate bignum in place (consider carefully how GC works) */
703static value bignum_truncate(value n, value len) {
704 assert(len <= bignum_len(n));
705 value ptr = untag(n);
706 heap[ptr] = ext_add_tag(len, ext_tag(heap[ptr]));
707 return n;
708}
709
710static int is_rational(value v) { return is_ext_type(v, T_RATIONAL); }
711
712static int is_exact(value v) {
713 return is_fixnum(v) || is_bignum(v) || is_rational(v);
714}
715static int is_number(value v) {
716 return is_fixnum(v) ||
717 (tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) >= T_FLONUM);
718}
719static int is_integer(value v) {
720 if (is_fixnum(v) || is_bignum(v)) return 1;
721 if (is_flonum(v)) {
722 double f = flonum_val(v);
723 return f == nearbyint(f);
724 }
725 return 0;
726}
727
728/* Characters */
729
730static value character(uchar c) { return add_tag(c, T_CHARACTER); }
731static int is_character(value v) { return tag(v) == T_CHARACTER; }
732static uchar safe_char_val(value c) {
733 if (!is_character(c)) sc_error1("not a character:", c);
734 return (uchar)c;
735}
736#define char_val(c) ((uchar)(c))
737
738/* Convert ASCII characters to upper/lowercase */
739static uchar uc(uchar c) {
740 if (c >= 'a' && c <= 'z') return c - 0x20;
741 return c;
742}
743static uchar lc(uchar c) {
744 if (c >= 'A' && c <= 'Z') return c + 0x20;
745 return c;
746}
747
748/* Strings */
749
750static value alloc_string(value len) {
751 if (len > EXT_LENGTH_MAX)
752 sc_error("length negative or too large for string");
753 return sc_malloc(1 + packed_str_len(len));
754}
755static value make_string_uninit(value len) {
756 value ptr = alloc_string(len);
757 heap[ptr] = ext_add_tag(len, T_STRING);
758 return add_tag(ptr, T_EXTENDED);
759}
760static value make_immutable_string(value len) {
761 value ptr = alloc_string(len);
762 heap[ptr] = ext_add_tag(len, T_IMMUT_STRING);
763 return add_tag(ptr, T_EXTENDED);
764}
765static int is_string(value v) {
766 return tag(v) == T_EXTENDED && (ext_tag(heap[untag(v)]) | 1) == T_STRING;
767}
768static int is_mutable_string(value v) {
769 return tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) == T_STRING;
770}
771static int is_symbol(value);
772static uchar * string_buf(value s) {
773 assert(is_string(s) || is_symbol(s));
774 return (uchar *)&heap[untag(s)+1];
775}
776/* C thinks strings are made of signed chars for some reason... */
777static char * c_string_buf(value s) {
778 assert(is_string(s) || is_symbol(s));
779 return (char *)string_buf(s);
780}
781static value string_len(value s) {
782 assert(is_string(s) || is_symbol(s));
783 return ext_untag(heap[untag(s)]);
784}
785/* Construct string from null-terminated C string not on the Scheme heap */
786static value string(const char *c_str) {
787 value len = strlen(c_str);
788 value str = make_string_uninit(len);
789 memcpy(string_buf(str), c_str, len);
790 return str;
791}
792static value make_string(value len, uchar fill) {
793 value s = make_string_uninit(len);
794 memset(string_buf(s), fill, len);
795 return s;
796}
797/* Construct immutable copy of string or symbol in R_EXPR */
798static value string_copy_immutable(void) {
799 value len = string_len(R_EXPR), ptr = alloc_string(len);
800 heap[ptr] = ext_add_tag(len, T_IMMUT_STRING);
801 memcpy(heap+ptr+1, string_buf(R_EXPR), len);
802 return add_tag(ptr, T_EXTENDED);
803}
804/* Construct copy of string in R_EXPR */
805static value string_copy(void) {
806 value len = string_len(R_EXPR);
807 value result = make_string_uninit(len);
808 memcpy(string_buf(result), string_buf(R_EXPR), len);
809 return result;
810}
811/* Construct copy of string in R_EXPR with null byte appended */
812static value string_append_null(void) {
813 value len = string_len(R_EXPR);
814 value result = make_string_uninit(len + 1);
815 uchar *buf = string_buf(result);
816 memcpy(buf, string_buf(R_EXPR), len);
817 buf[len] = '\0';
818 return result;
819}
820/* Truncate string in place (consider carefully how GC works) */
821static void string_truncate(value s, value len) {
822 assert(len <= string_len(s));
823 value ptr = untag(s);
824 heap[ptr] = ext_add_tag(len, ext_tag(heap[ptr]));
825}
826
827/* Symbols */
828
829static value symbols; /* interning list */
830
831/* Frequently used symbols */
832static value s_lambda, s_quote, s_quasiquote, s_unquote, s_unquote_splicing,
833 s_if, s_set, s_begin, s_letrec, s_define, s_delay, s_literal,
834 s_open_paren, s_close_paren, s_dot, s_open_vector, s_identifier,
835 s_named_char, s_abbrev, s_number, s_truncate, s_overwrite,
836 s_append, s_sync, s_data_sync;
837
838static value find_symbol(const uchar *buf, value len) {
839 value iter, sym;
840 /* some type checks skipped because interning list is not (directly) user
841 * modifiable */
842 for (iter = symbols; iter != SC_NULL; iter = cdr(iter)) {
843 sym = car(iter);
844 if (len == ext_untag(heap[untag(sym)]) &&
845 memcmp(buf, &heap[untag(sym)+1], len) == 0)
846 return sym;
847 }
848 return SC_NULL;
849}
850/* Get symbol from a null-terminated C string not on the Scheme heap, not
851 * converting case (side effects: R_CAR R_CDR) */
852static value symbol(const char *c_str) {
853 value len = strlen(c_str);
854 value sym = find_symbol((uchar *)c_str, len);
855 if (sym != SC_NULL) return sym;
856 value sym_ptr = sc_malloc(1 + packed_str_len(len));
857 heap[sym_ptr] = ext_add_tag(len, T_SYMBOL);
858 memcpy(&heap[sym_ptr+1], c_str, len);
859 R_CAR = add_tag(sym_ptr, T_EXTENDED);
860 R_CDR = symbols;
861 symbols = cons();
862 return R_CAR;
863}
864/* Get symbol from a Scheme string in R_CAR, not converting case
865 * (side effects: R_CAR R_CDR) */
866static value string_to_symbol(void) {
867 value len = string_len(R_CAR);
868 value sym = find_symbol(string_buf(R_CAR), len);
869 if (sym != SC_NULL) return sym;
870 value sym_ptr = sc_malloc(1 + packed_str_len(len));
871 heap[sym_ptr] = ext_add_tag(len, T_SYMBOL);
872 memcpy(&heap[sym_ptr+1], string_buf(R_CAR), len);
873 R_CAR = add_tag(sym_ptr, T_EXTENDED);
874 R_CDR = symbols;
875 symbols = cons();
876 return R_CAR;
877}
878static int is_symbol(value v) { return is_ext_type(v, T_SYMBOL); }
879
880/* Vectors */
881
882static value alloc_vector(value len) {
883 if (len > EXT_LENGTH_MAX)
884 sc_error("length negative or too large for vector");
885 return sc_malloc(1 + len);
886}
887/* Uninitialized constructors: caller must fill without further allocation */
888static value make_vector_uninit(value len) {
889 value vec = alloc_vector(len);
890 heap[vec] = ext_add_tag(len, T_VECTOR);
891 return add_tag(vec, T_EXTENDED);
892}
893static value make_immutable_vector(value len) {
894 value vec = alloc_vector(len);
895 heap[vec] = ext_add_tag(len, T_IMMUT_VECTOR);
896 return add_tag(vec, T_EXTENDED);
897}
898/* Build a new vector with each element initialized to R_EXPR */
899static value make_vector(value len) {
900 value vec = make_vector_uninit(len), *p;
901 for (p = heap+untag(vec)+1; len; --len, ++p) *p = R_EXPR;
902 return vec;
903}
904/* Build a new vector by reversing the elements of proper list R_EXPR */
905static value rev_list_to_vec(void) {
906 value len = list_length(R_EXPR),
907 vec = make_vector_uninit(len),
908 *p = heap+untag(vec)+len;
909 for (; R_EXPR != SC_NULL; --p, R_EXPR = cdr(R_EXPR)) *p = car(R_EXPR);
910 return vec;
911}
912static int is_vector(value v) {
913 return tag(v) == T_EXTENDED && (ext_tag(heap[untag(v)]) | 1) == T_VECTOR;
914}
915static int is_mutable_vector(value v) {
916 return tag(v) == T_EXTENDED && ext_tag(heap[untag(v)]) == T_VECTOR;
917}
918static value vector_len(value v) {
919 assert(is_vector(v));
920 return ext_untag(heap[untag(v)]);
921}
922static value vector_ref(value v, value k) {
923 assert(k < vector_len(v));
924 return heap[untag(v)+k+1];
925}
926static void vector_set(value v, value k, value obj) {
927 assert(k < vector_len(v));
928 heap[untag(v)+k+1] = obj;
929}
930
931/* Builtin procedures */
932
933static value builtin(const char *name, builtin_func_t func) {
934 value b = sc_malloc(3);
935 heap[b] = ext_add_tag(0, T_BUILTIN);
936 heap[b+1] = (value)name;
937 heap[b+2] = (value)func;
938 return add_tag(b, T_EXTENDED);
939}
940static int is_builtin(value v) { return is_ext_type(v, T_BUILTIN); }
941static const char * builtin_name(value b) {
942 return (char *)heap[untag(b)+1];
943}
944static builtin_func_t builtin_func(value b) {
945 return (builtin_func_t)heap[untag(b)+2];
946}
947
948/* Compound procedures */
949
950/* Return a new procedure object from lambda expression operands in R_OPERANDS
951 * and environment in R_ENV.
952 * Side effects: R_OPERANDS R_CAR R_CDR */
953static value procedure(void) {
954 value p, arity;
955 arity = car(R_OPERANDS);
956 if (is_fixnum(arity)) {
957 /* Compiler annotated parameter list attributes to save a traversal */
958 R_OPERANDS = cdr(R_OPERANDS);
959 }
960 else {
961 /* ...this traversal (still needed for bootstrapping), which in turn
962 * saves traversing each time the procedure is applied */
963 p = arity; /* parameter list */
964 arity = 0;
965 for (; is_pair(p); p = cdr(p)) arity++;
966 if (p == SC_NULL) arity = fixnum(arity);
967 else {
968 /* improper (variadic) */
969 assert(is_symbol(p));
970 arity = (value)(-1L - (long)arity);
971 }
972 }
973 p = sc_malloc(4);
974 heap[p] = ext_add_tag(ext_untag(arity), T_PROCEDURE);
975 heap[p+1] = car(R_OPERANDS); /* parameter list */
976 heap[p+2] = cdr(R_OPERANDS); /* body */
977 heap[p+3] = R_ENV;
978 return add_tag(p, T_EXTENDED);
979}
980static int is_compound_proc(value v) { return is_ext_type(v, T_PROCEDURE); }
981static long proc_arity(value p) { return ext_untag_signed(heap[untag(p)]); }
982static value proc_params(value p) { return heap[untag(p)+1]; }
983static value proc_body(value p) { return heap[untag(p)+2]; }
984static value proc_env(value p) { return heap[untag(p)+3]; }
985
986/* Continuations */
987
988static value current_continuation(void) {
989 value cont = sc_malloc(3);
990 heap[cont] = ext_add_tag(0, T_CONTINUATION);
991 heap[cont+1] = r_stack;
992 heap[cont+2] = r_spool;
993 return add_tag(cont, T_EXTENDED);
994}
995static int is_continuation(value v) { return is_ext_type(v, T_CONTINUATION); }
996static value continuation_stack(value c) { return heap[untag(c)+1]; }
997static value continuation_spool(value c) { return heap[untag(c)+2]; }
998
999static int is_procedure(value v) {
1000 return is_builtin(v) || is_compound_proc(v) || is_continuation(v);
1001}
1002
1003/* Promises */
1004
1005/* Construct a promise from an expression in R_EXPR and environment in R_ENV */
1006static value promise(void) {
1007 value p = sc_malloc(3);
1008 heap[p] = ext_add_tag(0, T_PROMISE);
1009 heap[p+1] = R_EXPR;
1010 heap[p+2] = R_ENV;
1011 return add_tag(p, T_EXTENDED);
1012}
1013static int is_promise(value v) { return is_ext_type(v, T_PROMISE); }
1014static int promise_done(value p) { return heap[untag(p)] & 1; }
1015static value promise_value(value p) { return heap[untag(p)+1]; }
1016static value promise_env(value p) { return heap[untag(p)+2]; }
1017static void promise_memoize(value p, value v) {
1018 value ptr = untag(p);
1019 heap[ptr] = ext_add_tag(1, T_PROMISE);
1020 heap[ptr+1] = v;
1021 heap[ptr+2] = SC_NULL; /* release to GC */
1022}
1023
1024/* Ports */
1025
1026static value stdin_port, stdout_port;
1027
1028#define DEFAULT_R_BUF 4096
1029#define DEFAULT_W_BUF 4096
1030
1031/* Flags in header */
1032#define PORT_OUTPUT_BIT 1
1033#define PORT_SOCKET_BIT 2
1034#define PORT_EOF_BIT 4
1035
1036/* Fields */
1037#define PORT_FD 1
1038#define PORT_START 2
1039#define PORT_FILL 3
1040#define PORT_BUF 4
1041#define PORT_COUNTERPART 5
1042
1043/* Construct unidirectional port. Side effects: R_RESULT */
1044static value make_port(int fd, int is_output, long buf_size) {
1045 value port, *p;
1046 if (buf_size < 1) sc_error("buffer size must be at least one");
1047 R_RESULT = make_string_uninit(buf_size);
1048 port = sc_malloc(6);
1049 p = heap+port;
1050 p[0] = ext_add_tag(is_output ? PORT_OUTPUT_BIT : 0, T_PORT);
1051 p[PORT_FD] = fixnum(fd);
1052 p[PORT_START] = fixnum(0);
1053 p[PORT_FILL] = fixnum(0);
1054 p[PORT_BUF] = R_RESULT;
1055 p[PORT_COUNTERPART] = SC_NULL;
1056 return add_tag(port, T_EXTENDED);
1057}
1058/* Construct input port in r0 and output port in r1 from socket file
1059 * descriptor. Side effects: R_RESULT */
1060static void make_socket_ports(int fd, value rbuf_size, value wbuf_size) {
1061 value *p;
1062 chkp(fcntl(fd, F_SETFL, O_NONBLOCK));
1063 r0 = make_port(fd, 0, rbuf_size);
1064 r1 = make_port(fd, 1, wbuf_size);
1065 /* Cross-reference the two directions so the underlying FD can be closed
1066 * promptly when both ports are. */
1067 p = heap+untag(r0);
1068 p[0] |= PORT_SOCKET_BIT;
1069 p[PORT_COUNTERPART] = r1;
1070 p = heap+untag(r1);
1071 p[0] |= PORT_SOCKET_BIT;
1072 p[PORT_COUNTERPART] = r0;
1073}
1074
1075static int is_port(value v) { return is_ext_type(v, T_PORT); }
1076static int is_input_port(value v) {
1077 value header;
1078 if (tag(v) != T_EXTENDED) return 0;
1079 header = heap[untag(v)];
1080 return ext_tag(header) == T_PORT && (header & PORT_OUTPUT_BIT) == 0;
1081}
1082static int is_output_port(value v) {
1083 value header;
1084 if (tag(v) != T_EXTENDED) return 0;
1085 header = heap[untag(v)];
1086 return ext_tag(header) == T_PORT && (header & PORT_OUTPUT_BIT) != 0;
1087}
1088
1089static int set_port_closed(value *p) {
1090 int fd = fixnum_val(p[PORT_FD]);
1091 /* Set an invalid FD so writes to a closed port are caught by the kernel
1092 * with no extra cost in the normal case. Disable buffering so they're
1093 * caught immediately. */
1094 p[PORT_FD] = fixnum(-1);
1095 p[PORT_START] = p[PORT_FILL] = fixnum(0);
1096 string_truncate(p[PORT_BUF], 1);
1097 if (p[PORT_COUNTERPART] == SC_NULL) return close(fd);
1098 heap[untag(p[PORT_COUNTERPART])+PORT_COUNTERPART] = SC_NULL;
1099 p[PORT_COUNTERPART] = SC_NULL;
1100 return 0;
1101}
1102static ssize_t fill_input_port(value *p, int nonblock) {
1103 int fd = fixnum_val(p[PORT_FD]);
1104 uchar *buf = string_buf(p[PORT_BUF]);
1105 value len = string_len(p[PORT_BUF]);
1106 ssize_t n;
1107 while ((n = read(fd, buf, len)) < 0) {
1108 if (errno == EINTR) continue;
1109 if (errno == EAGAIN || errno == EWOULDBLOCK) {
1110 if (nonblock) return -1;
1111 poll1(fd, POLLIN, -1); continue;
1112 }
1113 if (fd == -1) sc_error("input port closed");
1114 sc_perror();
1115 }
1116 p[PORT_START] = fixnum(0);
1117 p[PORT_FILL] = fixnum(n);
1118 return n;
1119}
1120static void flush_output_port(value *p) {
1121 int fd = fixnum_val(p[PORT_FD]);
1122 long fill = fixnum_val(p[PORT_FILL]);
1123 assert(fill > 0); /* zero-length write unspecified on non-regular files */
1124 assert((ulong)fill <= string_len(p[PORT_BUF]));
1125 p[PORT_FILL] = fixnum(0);
1126 if (write_all(fd, c_string_buf(p[PORT_BUF]), fill) == -1) {
1127 int saved;
1128 if (fd == -1) sc_error("output port closed");
1129 /* Probably no sensible way to recover from write errors, so force the
1130 * port closed. XXX Closing standard streams is a concern (i.e. a
1131 * subsequent open gets FD 1 or 2 and terminal output goes to the file
1132 * unexpectedly), except: 1) the interpreter writes to stdout through
1133 * the port object only; 2) the open-subprocess extension always pipes
1134 * the child's stdout; 3) there's no port for stderr. But these are
1135 * fragile assumptions. */
1136 saved = errno; set_port_closed(p); errno = saved;
1137 sc_perror();
1138 }
1139}
1140
1141static void flush_if_needed(value port) {
1142 value *p = heap+untag(port);
1143 if (fixnum_val(p[PORT_FILL]) > 0) flush_output_port(p);
1144}
1145static void close_port(value port) {
1146 value *p = heap+untag(port), header = p[0];
1147 int fd = fixnum_val(p[PORT_FD]);
1148 if (fd == -1) return;
1149 if (header & PORT_OUTPUT_BIT) flush_if_needed(port);
1150 if (header & PORT_SOCKET_BIT)
1151 shutdown(fd, header & PORT_OUTPUT_BIT ? SHUT_WR : SHUT_RD);
1152 chkp(set_port_closed(p));
1153}
1154static value read_char(value port) {
1155 value *p = heap+untag(port), start = p[PORT_START];
1156 uchar *buf = string_buf(p[PORT_BUF]);
1157 if (start == p[PORT_FILL]) {
1158 if (p[0] & PORT_EOF_BIT) { p[0] ^= PORT_EOF_BIT; return SC_EOF; }
1159 if (!fill_input_port(p, 0)) return SC_EOF;
1160 start = 0;
1161 }
1162 else start = untag(start);
1163 p[PORT_START] = fixnum(start+1);
1164 return character(buf[start]);
1165}
1166static value peek_char(value port) {
1167 value *p = heap+untag(port), start = p[PORT_START];
1168 uchar *buf = string_buf(p[PORT_BUF]);
1169 if (start == p[PORT_FILL]) {
1170 /* EOF is not always permanent, e.g. on a tty, so the condition must be
1171 * saved specially for the next peek or read. */
1172 if (p[0] & PORT_EOF_BIT) return SC_EOF;
1173 if (!fill_input_port(p, 0)) { p[0] |= PORT_EOF_BIT; return SC_EOF; }
1174 start = 0;
1175 }
1176 else start = untag(start);
1177 return character(buf[start]);
1178}
1179static value input_port_ready(value port) {
1180 value *p;
1181 int fd;
1182 p = heap+untag(port);
1183 fd = fixnum_val(p[PORT_FD]);
1184 if (p[PORT_START] < p[PORT_FILL]) return SC_TRUE;
1185 if (fd == -1) sc_error("input port closed");
1186 if (!poll1(fd, POLLIN, 0)) return SC_FALSE;
1187 /* XXX Linux poll/select are broken and have false positives for
1188 * readability, at least for sockets, so we try a nonblocking read. But
1189 * this doesn't work for regular files! Seems marginally better to break
1190 * "the next READ-CHAR operation on the given PORT is guaranteed not to
1191 * hang" than have CHAR-READY? itself hang. Alternately, djb's SIGALARM
1192 * hack could be used. */
1193 if (p[0] & PORT_SOCKET_BIT && fill_input_port(p, 1) == -1) return SC_FALSE;
1194 return SC_TRUE;
1195}
1196/* Barbarous relic from writing the lexer based on stdio/ungetc */
1197#define EOF (-1)
1198static void put_back_char(int c) {
1199 value *p;
1200 assert(is_port(R_PORT));
1201 p = heap+untag(R_PORT);
1202 if (c == EOF) p[0] |= PORT_EOF_BIT;
1203 else {
1204 value start = untag(p[PORT_START]);
1205 assert(start);
1206 --start;
1207 string_buf(p[PORT_BUF])[start] = c;
1208 p[PORT_START] = fixnum(start);
1209 }
1210}
1211static void write_char(uchar c) {
1212 value *p, fill, len;
1213 uchar *buf;
1214 assert(is_port(R_PORT));
1215 p = heap+untag(R_PORT);
1216 fill = untag(p[PORT_FILL]);
1217 len = string_len(p[PORT_BUF]);
1218 assert(fill < len);
1219 buf = string_buf(p[PORT_BUF]);
1220 buf[fill] = c;
1221 ++fill;
1222 p[PORT_FILL] = fixnum(fill);
1223 if (fill == len) flush_output_port(p);
1224}
1225
1226static int stdout_ready;
1227static void flush_all(void) {
1228 /* TODO */
1229 if (stdout_ready) flush_if_needed(stdout_port);
1230}
1231
1232static void write_cstr(const char *s) { for (; *s; ++s) write_char(*s); }
1233static void write_str(value s) { /* also for symbols */
1234 value len = string_len(s);
1235 uchar *buf = string_buf(s);
1236 assert(is_string(s) || is_symbol(s));
1237 for (; len; --len, ++buf) write_char(*buf);
1238}
1239static void write_str_quoted(value s) {
1240 value i, len = string_len(s);
1241 uchar *buf = string_buf(s);
1242 write_char('"');
1243 for (i = 0; i < len; i++) {
1244 uchar c = buf[i];
1245 if (c == '"' || c == '\\') write_char('\\');
1246 write_char(c);
1247 }
1248 write_char('"');
1249}
1250static void newline(void) { write_char('\n'); }
1251
1252/* Environments
1253 *
1254 * An environment is a list of lexical frames followed by global frames.
1255 *
1256 * A lexical frame is a vector of which the first element is the list of
1257 * symbols naming the variables (possibly improper, as in a lambda expression),
1258 * and the remaining elements are the corresponding values.
1259 *
1260 * A global frame is a list of (symbol . value) binding pairs. */
1261
1262static value r5rs_env, gscm_env, interaction_env, toplevel_env;
1263
1264static void check_mutable_env(value env, value name) {
1265 if (env != interaction_env) {
1266 assert(env == r5rs_env || env == gscm_env || env == toplevel_env);
1267 sc_error1("variable in immutable environment:", name);
1268 }
1269}
1270
1271/* Construct a new lexical frame for the application of the procedure in R_PROC
1272 * to the freshly allocated argument list in R_ARGS (no other side effects) */
1273static value make_lex_frame(void) {
1274 value k, frame, args, arity, fixed_arity;
1275 long encoded_arity = proc_arity(R_PROC);
1276 if (encoded_arity < 0) {
1277 arity = (value)(-encoded_arity);
1278 fixed_arity = arity - 1;
1279 }
1280 else {
1281 arity = (value)encoded_arity;
1282 fixed_arity = arity;
1283 }
1284 frame = make_vector_uninit(1 + arity);
1285 vector_set(frame, 0, proc_params(R_PROC));
1286 args = R_ARGS;
1287 for (k = 1; k <= fixed_arity; k++) {
1288 if (args == SC_NULL) sc_error("too few arguments");
1289 vector_set(frame, k, car(args));
1290 args = cdr(args);
1291 }
1292 if (fixed_arity < arity) vector_set(frame, k, args);
1293 else if (args != SC_NULL) sc_error("too many arguments");
1294 return frame;
1295}
1296
1297/* Construct a new lexical frame for a LETREC binding list in r2, that is, bind
1298 * the given names to not-yet-defined values. The name list is constructed in
1299 * reverse order.
1300 * Side effects: r2 R_CAR R_CDR */
1301static value make_letrec_frame(void) {
1302 /* TODO optimize: transpose the binding list? */
1303 value k, len, frame;
1304 R_CDR = SC_NULL;
1305 len = 1;
1306 for (; r2 != SC_NULL; r2 = cdr(r2)) {
1307 len++;
1308 R_CAR = car(car(r2));
1309 R_CDR = cons();
1310 }
1311 frame = make_vector_uninit(len);
1312 vector_set(frame, 0, R_CDR);
1313 for (k = 1; k < len; k++)
1314 vector_set(frame, k, UNDEFINED);
1315 return frame;
1316}
1317
1318/* Add a new binding for R_CAR to R_CDR to the topmost frame of global R_ENV.
1319 * Side effects: R_CAR R_CDR */
1320static void extend_global_env(void) {
1321 R_CAR = cons(); /* new binding */
1322 R_CDR = car(R_ENV); /* top frame */
1323 assert(is_pair(R_CDR) || R_CDR == SC_NULL);
1324 R_CDR = cons();
1325 set_car(R_ENV, R_CDR);
1326}
1327
1328/* Construct a new global frame containing copies of the bindings in the frame
1329 * in R_EXPR. Side effects: R_CAR R_CDR R_EXPR R_TAIL R_RESULT */
1330static value copy_global_frame(void) {
1331 value temp;
1332 R_CAR = R_CDR = SC_NULL;
1333 R_TAIL = R_RESULT = cons();
1334 for (; R_EXPR != SC_NULL; R_EXPR = cdr(R_EXPR)) {
1335 temp = car(R_EXPR);
1336 R_CAR = car(temp); R_CDR = cdr(temp);
1337 R_CAR = cons(); /* copied binding */
1338 R_CDR = SC_NULL;
1339 temp = cons();
1340 set_cdr(R_TAIL, temp);
1341 R_TAIL = temp;
1342 }
1343 return cdr(R_RESULT);
1344}
1345
1346static value global_frame_lookup(value name, value frame) {
1347 value binding;
1348 for (; frame != SC_NULL; frame = cdr(frame)) {
1349 binding = car(frame);
1350 if (car(binding) == name) return binding;
1351 }
1352 return SC_FALSE;
1353}
1354
1355static value lex_frame_lookup(value name, value frame) {
1356 value names, index;
1357 index = 1;
1358 for (names = vector_ref(frame, 0); is_pair(names); names = cdr(names)) {
1359 if (car(names) == name) goto found;
1360 index++;
1361 }
1362 if (names != name) return 0;
1363found:
1364 if (vector_ref(frame, 1) == UNDEFINED) /* see LETREC */
1365 sc_error1("undefined variable:", name);
1366 return index;
1367}
1368
1369static value env_lookup(value name, value env) {
1370 value frame, binding, index;
1371 assert(is_symbol(name));
1372 for (; env != SC_NULL; env = cdr(env)) {
1373 frame = car(env);
1374 if (is_vector(frame)) {
1375 index = lex_frame_lookup(name, frame);
1376 if (index) return vector_ref(frame, index);
1377 }
1378 else {
1379 binding = global_frame_lookup(name, frame);
1380 if (binding != SC_FALSE) return cdr(binding);
1381 }
1382 }
1383 sc_error1("unbound variable:", name);
1384}
1385
1386static void env_lookup_set(value name, value env, value new) {
1387 value frame, binding, index;
1388 assert(is_symbol(name));
1389 for (; env != SC_NULL; env = cdr(env)) {
1390 frame = car(env);
1391 if (is_vector(frame)) {
1392 index = lex_frame_lookup(name, frame);
1393 if (index) {
1394 vector_set(frame, index, new);
1395 return;
1396 }
1397 }
1398 else {
1399 binding = global_frame_lookup(name, frame);
1400 if (binding != SC_FALSE) {
1401 check_mutable_env(env, name);
1402 set_cdr(binding, new);
1403 return;
1404 }
1405 }
1406 }
1407 sc_error1("unbound variable:", name);
1408}
1409
1410/* Variable references: created by compiler to memoize environment lookups */
1411
1412static int is_variable_ref(value v) { return is_ext_type(v, T_VARIABLE_REF); }
1413
1414/* Return an unresolved variable reference for a symbol in R_CAR */
1415static value make_variable_ref() {
1416 assert(is_symbol(R_CAR));
1417 value ref = sc_malloc(2);
1418 heap[ref] = ext_add_tag(0, T_VARIABLE_REF);
1419 heap[ref+1] = R_CAR;
1420 return add_tag(ref, T_EXTENDED);
1421}
1422
1423/* Look up an unresolved variable reference and memoize */
1424static void resolve_variable_ref(value ref, value env, int mutable) {
1425 value ptr, name, frame, height, binding, index;
1426 ptr = untag(ref);
1427 name = heap[ptr+1];
1428 assert(is_symbol(name));
1429 height = 0;
1430 for (; env != SC_NULL; env = cdr(env)) {
1431 frame = car(env);
1432 if (is_vector(frame)) {
1433 index = lex_frame_lookup(name, frame);
1434 if (index) {
1435 if (height > FIXNUM_MAX)
1436 /* maybe possible on small architectures */
1437 sc_error("environment too deep");
1438 heap[ptr] = ext_add_tag(index, T_VARIABLE_REF);
1439 heap[ptr+1] = add_tag(height, T_FIXNUM);
1440 return;
1441 }
1442 }
1443 else {
1444 binding = global_frame_lookup(name, frame);
1445 if (binding != SC_FALSE) {
1446 if (mutable) check_mutable_env(env, name);
1447 heap[ptr+1] = binding;
1448 return;
1449 }
1450 }
1451 height++;
1452 }
1453 sc_error1("unbound variable:", name);
1454}
1455
1456static value variable_ref_get(value ref, value env) {
1457 value ptr, contents, height;
1458 ptr = untag(ref);
1459retry:
1460 contents = heap[ptr+1];
1461 if (is_pair(contents)) /* global */
1462 return cdr(contents);
1463 else if (is_fixnum(contents)) { /* lexical */
1464 for (height = fixnum_val(contents); height; height--)
1465 env = cdr(env);
1466 return vector_ref(car(env), ext_untag(heap[ptr]));
1467 }
1468 else { /* unresolved */
1469 resolve_variable_ref(ref, env, 0);
1470 goto retry;
1471 }
1472}
1473
1474static void variable_ref_set(value ref, value env, value new) {
1475 value ptr, contents, height;
1476 ptr = untag(ref);
1477retry:
1478 contents = heap[ptr+1];
1479 if (is_pair(contents)) /* global */
1480 set_cdr(contents, new);
1481 else if (is_fixnum(contents)) { /* lexical */
1482 for (height = fixnum_val(contents); height; height--)
1483 env = cdr(env);
1484 vector_set(car(env), ext_untag(heap[ptr]), new);
1485 }
1486 else { /* unresolved */
1487 resolve_variable_ref(ref, env, 1);
1488 goto retry;
1489 }
1490}
1491
1492
1493/***********
1494 * Debugging
1495 */
1496
1497static void shallow_print(void);
1498
1499void sc_dump(value v) {
1500 r_dump = v;
1501 PUSH(R_CAR);
1502 PUSH(R_EXPR);
1503 PUSH(R_PORT);
1504 R_EXPR = r_dump;
1505 R_PORT = stdout_port;
1506 shallow_print();
1507 newline();
1508 R_PORT = pop();
1509 R_EXPR = pop();
1510 R_CAR = pop();
1511 r_dump = SC_NULL;
1512}
1513
1514
1515/****************
1516 * Core evaluator
1517 *
1518 * The evaluator is a set of subroutines delimited by labels, with "switch"
1519 * cases serving as pushable return addresses. (Caution is needed in case of
1520 * nested switches or "break".) Properly tail recursive calls are where "goto"
1521 * is used rather than CALL, that is, a new return address is not pushed.
1522 * Nothing else may be left on the subroutine's stack frame in these cases!
1523 */
1524
1525/* Shorthand for non-tail subroutine calls. Beware of the register side effects
1526 * or confusing RETURN with C return. */
1527#define CALL(subroutine_label, return_address) \
1528 { R_CAR = return_address; push(); goto subroutine_label; }
1529#define RETURN(val) { R_RESULT = (val); goto dispatch; }
1530
1531/* Return addresses */
1532#define EV_DONE 0
1533#define EV_COMPILE_RESULT 1
1534#define EV_CALL_OPERATOR 2
1535#define EV_CALL_LOOP 3
1536#define EV_UNWIND_LOOP 4
1537#define EV_REWIND_LOOP 5
1538#define EV_SEQ_LOOP 6
1539#define EV_IF_PREDICATE 7
1540#define EV_SET_RESULT 8
1541#define EV_LETREC_LOOP 9
1542#define EV_DEFINE_RESULT 10
1543#define EV_FORCE_RESULT 11
1544#define EV_CALL_WITH_VALUES 12
1545
1546static const char *err_context;
1547static jmp_buf err_longjmp_env;
1548
1549/* Takes expression in R_EXPR and environment in R_ENV */
1550static void evaluator(void) {
1551 value k;
1552 if (setjmp(err_longjmp_env)) goto APPLY;
1553 if (r_compiler) CALL(COMPILE, EV_DONE);
1554 CALL(EVAL, EV_DONE);
1555dispatch:
1556 switch (pop()) {
1557 case EV_DONE:
1558 assert(r_stack == SC_NULL);
1559 r_error_cont = SC_NULL;
1560 break;
1561
1562COMPILE:
1563 /* Compile expression R_EXPR then evaluate in environment R_ENV */
1564 PUSH(R_ENV);
1565 R_CAR = R_EXPR;
1566 R_CDR = SC_NULL;
1567 R_ARGS = cons();
1568 R_PROC = r_compiler;
1569 CALL(APPLY, EV_COMPILE_RESULT);
1570 case EV_COMPILE_RESULT:
1571 R_EXPR = R_RESULT;
1572 R_ENV = pop();
1573 goto EVAL;
1574
1575EVAL:
1576 /* Evaluate expression R_EXPR in environment R_ENV */
1577 err_context = "eval";
1578 if (is_pair(R_EXPR)) { /* Combination */
1579 R_OPERANDS = cdr(R_EXPR);
1580 R_EXPR = car(R_EXPR);
1581 if (is_symbol(R_EXPR)) {
1582 if (R_EXPR == s_lambda) RETURN(procedure());
1583 if (R_EXPR == s_if) goto IF;
1584 if (R_EXPR == s_set) goto SET;
1585 if (R_EXPR == s_begin) goto EVAL_BODY;
1586 if (R_EXPR == s_letrec) goto LETREC;
1587 if (R_EXPR == s_quote) RETURN(car(R_OPERANDS));
1588 if (R_EXPR == s_define) goto DEFINE;
1589 if (R_EXPR == s_delay) goto DELAY;
1590 }
1591 goto EVAL_CALL;
1592 }
1593 if (is_variable_ref(R_EXPR)) /* Cacheable variable reference */
1594 RETURN(variable_ref_get(R_EXPR, R_ENV));
1595 if (is_symbol(R_EXPR))
1596 /* Slow and stupid variable lookup: replacing symbols in the
1597 * expression tree with variable references is done by the
1598 * compiler, so this is needed to bootstrap */
1599 RETURN(env_lookup(R_EXPR, R_ENV));
1600 assert(is_number(R_EXPR) ||
1601 is_boolean(R_EXPR) ||
1602 is_character(R_EXPR) ||
1603 is_string(R_EXPR) ||
1604 /* not valid Scheme, but allowed in compiler output */
1605 R_EXPR == SC_NULL ||
1606 is_vector(R_EXPR));
1607 RETURN(R_EXPR); /* Self-evaluating */
1608
1609EVAL_CALL:
1610 /* Procedure call (operator operand ...)
1611 * Evaluate operator in R_EXPR and each operand in R_OPERANDS, build
1612 * argument list and apply in R_ENV. */
1613 PUSH(R_OPERANDS);
1614 PUSH(R_ENV);
1615 CALL(EVAL, EV_CALL_OPERATOR);
1616 case EV_CALL_OPERATOR:
1617 R_ENV = pop();
1618 R_CAR = R_RESULT;
1619 R_OPERANDS = pop();
1620 push(); /* evaluated operator */
1621 R_CAR = R_CDR = SC_NULL;
1622 R_TAIL = cons(); /* arg list tail pointer */
1623 PUSH(R_TAIL); /* arg list head pointer */
1624 PUSH(R_ENV);
1625 for (; R_OPERANDS != SC_NULL; R_OPERANDS = cdr(R_OPERANDS)) {
1626 PUSH(R_OPERANDS);
1627 PUSH(R_TAIL);
1628 R_EXPR = car(R_OPERANDS);
1629 CALL(EVAL, EV_CALL_LOOP);
1630 case EV_CALL_LOOP:
1631 R_CAR = R_RESULT;
1632 R_TAIL = pop();
1633 R_OPERANDS = pop();
1634 R_ENV = peek();
1635 R_CDR = SC_NULL;
1636 R_CDR = cons();
1637 set_cdr(R_TAIL, R_CDR);
1638 R_TAIL = R_CDR;
1639 }
1640 drop(); /* environment */
1641 R_ARGS = cdr(pop()); /* arg list head pointer */
1642 R_PROC = pop(); /* evaluated operator */
1643 goto APPLY;
1644
1645APPLY:
1646 /* Extend the lexical environment of procedure R_PROC by binding its
1647 * formal parameters to arguments in the freshly allocated list R_ARGS,
1648 * then evaluate its body in the new environment. */
1649 if (is_builtin(R_PROC)) {
1650 err_context = builtin_name(R_PROC);
1651 r_flag = f_none;
1652 R_RESULT = (builtin_func(R_PROC))(R_ARGS);
1653 /* Builtins cannot call back into the evaluator as that would break
1654 * tail recursion and enable unlimited recursion on the C stack.
1655 * Instead they can set a flag to signal a tail call to a given
1656 * subroutine. */
1657 switch (r_flag) {
1658 case f_none: RETURN(R_RESULT);
1659 case f_compile: goto COMPILE;
1660 case f_apply: goto APPLY;
1661 case f_force: goto FORCE;
1662 case f_call_with_values: goto CALL_WITH_VALUES;
1663 /* optimization, see RETURN_VALUES */
1664 case f_values: goto VALUES;
1665 }
1666 }
1667 err_context = "apply";
1668 if (is_compound_proc(R_PROC)) {
1669 R_OPERANDS = proc_body(R_PROC);
1670 R_CAR = make_lex_frame();
1671 R_CDR = proc_env(R_PROC);
1672 R_ENV = cons();
1673 goto EVAL_BODY;
1674 }
1675 if (is_continuation(R_PROC)) goto APPLY_CONTINUATION;
1676 sc_error1("not a procedure:", R_PROC);
1677
1678APPLY_CONTINUATION:
1679 /* Return the value(s) R_ARGS to the continuation R_PROC, restoring its
1680 * stack and applying any thunks registered to exit the current dynamic
1681 * extent and re-enter the captured one. */
1682 R_WIND_TO = continuation_spool(R_PROC);
1683 if (r_spool != R_WIND_TO) {
1684 R_LCA = lowest_common_ancestor(r_spool, R_WIND_TO);
1685 r_stack = SC_NULL;
1686 PUSH(R_ARGS);
1687 PUSH(R_PROC);
1688 /* Unwind: apply "after" thunks from the current extent up to (but
1689 * not including) the narrowest common extent */
1690 while (r_spool != R_LCA) {
1691 assert(r_spool != SC_NULL);
1692 /* XXX ^ possible to violate if thunk escapes? */
1693 R_PROC = cdr(car(r_spool));
1694 r_spool = cdr(r_spool);
1695 R_ARGS = SC_NULL;
1696 PUSH(R_LCA);
1697 CALL(APPLY, EV_UNWIND_LOOP);
1698 case EV_UNWIND_LOOP:
1699 R_LCA = pop();
1700 }
1701 /* Rewind: apply "before" thunks down to the captured extent
1702 * starting below the common extent */
1703 R_WIND_TO = continuation_spool(peek());
1704 for (r_spool = R_WIND_TO; r_spool != R_LCA; r_spool = cdr(r_spool))
1705 PUSH(r_spool);
1706 while (r_spool != R_WIND_TO) {
1707 R_PROC = car(car(peek()));
1708 R_ARGS = SC_NULL;
1709 PUSH(R_WIND_TO);
1710 CALL(APPLY, EV_REWIND_LOOP);
1711 case EV_REWIND_LOOP:
1712 R_WIND_TO = pop();
1713 r_spool = pop();
1714 }
1715 R_PROC = pop();
1716 R_ARGS = pop();
1717 assert(r_stack == SC_NULL);
1718 }
1719 r_stack = continuation_stack(R_PROC);
1720 VALUES:
1721 if (peek() == EV_CALL_WITH_VALUES) {
1722 drop();
1723 goto CALL_WITH_VALUES_CONT;
1724 }
1725 if (R_ARGS == SC_NULL) sc_error("no value for ordinary continuation");
1726 if (cdr(R_ARGS) != SC_NULL)
1727 sc_error1("multiple values for ordinary continuation:", R_ARGS);
1728 RETURN(car(R_ARGS));
1729
1730EVAL_BODY:
1731 /* Evaluate one or more commands/expressions. (No definitions; we don't
1732 * need to distinguish sequence from body, as internal definitions are
1733 * converted to letrec by the compiler.)
1734 * Paramters: R_OPERANDS R_ENV */
1735 PUSH(R_ENV);
1736 assert(R_OPERANDS != SC_NULL);
1737 for (; cdr(R_OPERANDS) != SC_NULL; R_OPERANDS = cdr(R_OPERANDS)) {
1738 R_EXPR = car(R_OPERANDS);
1739 PUSH(R_OPERANDS);
1740 CALL(EVAL, EV_SEQ_LOOP);
1741 case EV_SEQ_LOOP:
1742 R_OPERANDS = pop();
1743 R_ENV = peek();
1744 }
1745 drop(); /* environment */
1746 R_EXPR = car(R_OPERANDS);
1747 goto EVAL;
1748
1749IF:
1750 /* (if predicate consequent [alternate])
1751 * Parameters: R_OPERANDS R_ENV */
1752 R_EXPR = car(R_OPERANDS); /* predicate */
1753 R_OPERANDS = cdr(R_OPERANDS);
1754 R_CAR = car(R_OPERANDS); /* consequent */
1755 R_OPERANDS = cdr(R_OPERANDS);
1756 push(); /* consequent */
1757 PUSH(R_OPERANDS); /* (alternate) */
1758 PUSH(R_ENV);
1759 CALL(EVAL, EV_IF_PREDICATE);
1760 case EV_IF_PREDICATE:
1761 R_ENV = pop();
1762 if (R_RESULT != SC_FALSE) {
1763 drop(); /* (alternate) */
1764 R_EXPR = pop(); /* consequent */
1765 goto EVAL;
1766 }
1767 R_EXPR = pop(); /* (alternate) */
1768 drop(); /* consequent */
1769 if (R_EXPR != SC_NULL) {
1770 R_EXPR = car(R_EXPR); /* alternate */
1771 goto EVAL;
1772 }
1773 RETURN(SC_NULL);
1774
1775SET:
1776 /* (set! variable value)
1777 * Parameters: R_OPERANDS R_ENV */
1778 err_context = "set!";
1779 R_CAR = car(R_OPERANDS); /* variable name/ref */
1780 R_EXPR = cadr(R_OPERANDS); /* value expression */
1781 push();
1782 PUSH(R_ENV);
1783 CALL(EVAL, EV_SET_RESULT);
1784 case EV_SET_RESULT:
1785 R_ENV = pop();
1786 R_CAR = pop(); /* variable name/ref */
1787 if (is_variable_ref(R_CAR))
1788 variable_ref_set(R_CAR, R_ENV, R_RESULT);
1789 else /* Slow and stupid lookup for bootstrap, as in EVAL */
1790 env_lookup_set(R_CAR, R_ENV, R_RESULT);
1791 RETURN(SC_NULL);
1792
1793LETREC:
1794 /* (letrec ((var init) ...) body)
1795 * Parameters: R_OPERANDS R_ENV */
1796 r2 = R_ARGS = car(R_OPERANDS); /* binding specifiers */
1797 PUSH(cdr(R_OPERANDS)); /* body */
1798 R_CAR = make_letrec_frame(); /* new frame */
1799 k = vector_len(R_CAR);
1800 R_CDR = R_ENV;
1801 R_ENV = cons(); /* new environment */
1802 /* Evaluate initializers in the new environment */
1803 PUSH(R_ENV);
1804 for (; R_ARGS != SC_NULL; R_ARGS = cdr(R_ARGS)) {
1805 k--;
1806 PUSH(k);
1807 PUSH(R_ARGS);
1808 R_EXPR = car(cdr(car(R_ARGS)));
1809 CALL(EVAL, EV_LETREC_LOOP);
1810 case EV_LETREC_LOOP:
1811 R_ARGS = pop();
1812 k = pop();
1813 R_ENV = peek();
1814 vector_set(car(R_ENV), k, R_RESULT);
1815 /* Trick: all variables in a frame are considered UNDEFINED if the
1816 * first one is. (Checking this is cheap due to memoized variable
1817 * refs.) Since we're filling in the frame backwards, to match the
1818 * reversed name list from make_letrec_frame, we catch uses of
1819 * undefined variables in the initializers without needing to store
1820 * their results in a temporary list here and then copy. */
1821 }
1822 drop();
1823 assert(k == 1);
1824 /* Evaluate body in the now populated environment */
1825 R_OPERANDS = pop(); /* body */
1826 goto EVAL_BODY;
1827
1828DEFINE:
1829 /* (define variable value)
1830 * Paramters: R_OPERANDS R_ENV */
1831 if (R_ENV != interaction_env) {
1832 err_context = "define";
1833 sc_error("not allowed in this environment");
1834 }
1835 PUSH(car(R_OPERANDS)); /* variable name */
1836 R_EXPR = car(cdr(R_OPERANDS)); /* value expression */
1837 CALL(EVAL, EV_DEFINE_RESULT);
1838 case EV_DEFINE_RESULT:
1839 /* XXX is this supposed to not handle variable refs? */
1840 R_ENV = interaction_env;
1841 R_CAR = pop(); /* variable name */
1842 R_EXPR = global_frame_lookup(R_CAR, car(R_ENV));
1843 if (R_EXPR == SC_FALSE) {
1844 R_CDR = R_RESULT;
1845 extend_global_env();
1846 }
1847 else set_cdr(R_EXPR, R_RESULT);
1848 RETURN(SC_NULL);
1849
1850DELAY:
1851 /* (delay expr)
1852 * Parameters: R_OPERANDS R_ENV */
1853 R_EXPR = car(R_OPERANDS);
1854 RETURN(promise());
1855
1856FORCE:
1857 /* Parameters: R_EXPR: promise */
1858 if (!is_promise(R_EXPR)) sc_error1("not a promise:", R_EXPR);
1859 if (promise_done(R_EXPR)) RETURN(promise_value(R_EXPR));
1860 PUSH(R_EXPR);
1861 R_ENV = promise_env(R_EXPR);
1862 R_EXPR = promise_value(R_EXPR);
1863 CALL(EVAL, EV_FORCE_RESULT);
1864 case EV_FORCE_RESULT:
1865 R_EXPR = pop();
1866 /* If promise forces itself recursively, keep the first result */
1867 if (promise_done(R_EXPR)) RETURN(promise_value(R_EXPR));
1868 promise_memoize(R_EXPR, R_RESULT);
1869 RETURN(R_RESULT);
1870
1871CALL_WITH_VALUES:
1872 /* Parameters: R_PROC: producer, R_ARGS: consumer */
1873 PUSH(R_ARGS);
1874 R_ARGS = SC_NULL;
1875 CALL(APPLY, EV_CALL_WITH_VALUES);
1876 case EV_CALL_WITH_VALUES:
1877 /* Producer returned a single value normally */
1878 R_CAR = R_RESULT;
1879 R_CDR = SC_NULL;
1880 R_ARGS = cons();
1881 CALL_WITH_VALUES_CONT:
1882 /* Producer returned by calling a continuation */
1883 R_PROC = pop();
1884 goto APPLY;
1885
1886 }
1887}
1888
1889/* Internal error signaller: similar in form to an evaluator subroutine, but
1890 * callable from downstack C functions. */
1891__attribute__((noreturn))
1892void sc_error1(const char *msg, value detail) {
1893 static int in_handler = 0;
1894 const char *sep = ": ";
1895 if (r_error_cont != SC_NULL) {
1896 /* Hook installed by toplevel. As it's a captured continuation,
1897 * unwinding from where the error occurred happens in the usual way. */
1898 R_PROC = r_error_cont;
1899 /* Mirroring toplevel, fall back to the default if an error is
1900 * recursively raised in the handler (or the allocations here). If a
1901 * handler is restored using SET-ERROR-HANDLER!, r_error_cont is
1902 * restored alongside. */
1903 r_error_cont = SC_NULL;
1904 R_CDR = SC_NULL;
1905 if (detail != UNDEFINED) {
1906 R_CAR = detail;
1907 R_CDR = cons();
1908 }
1909 if (err_context) {
1910 value cl = strlen(err_context), sl = strlen(sep), ml = strlen(msg);
1911 uchar *buf = string_buf(R_CAR = make_string_uninit(cl + sl + ml));
1912 memcpy(buf, err_context, cl); buf += cl;
1913 memcpy(buf, sep, sl); buf += sl;
1914 memcpy(buf, msg, ml);
1915 }
1916 else R_CAR = string(msg);
1917 R_CAR = cons();
1918 R_CDR = SC_NULL;
1919 R_ARGS = cons();
1920 longjmp(err_longjmp_env, 1);
1921 }
1922 else if (stdout_port && !in_handler) {
1923 /* Default handler: print and halt */
1924 in_handler = 1; /* fall back to fatal if this too raises an error */
1925 R_PORT = stdout_port;
1926 write_cstr("ERROR [fallback]: ");
1927 if (err_context) {
1928 write_cstr(err_context);
1929 write_cstr(sep);
1930 }
1931 write_cstr(msg);
1932 if (detail != UNDEFINED) {
1933 write_char(' ');
1934 R_EXPR = detail;
1935 shallow_print();
1936 }
1937 newline();
1938 sc_exit(1);
1939 }
1940 else fatal(msg); /* Not initialized, or loop */
1941}
1942
1943
1944/*****************
1945 * Lexical scanner
1946 */
1947
1948/* Initial buffer allocation for token types that need it */
1949#define DEFAULT_LEXBUF_SIZE 32
1950
1951static value lexeme_length;
1952static void lexbuf_init(void) {
1953 lexeme_length = 0;
1954 R_LEXEME = make_string_uninit(DEFAULT_LEXBUF_SIZE);
1955}
1956static void lexbuf_append(uchar c) {
1957 value buf_length = string_len(R_LEXEME);
1958 if (lexeme_length == buf_length) {
1959 value new_length = buf_length * 2;
1960 if (new_length > EXT_LENGTH_MAX) {
1961 new_length = EXT_LENGTH_MAX;
1962 if (lexeme_length == new_length) sc_error("token too long");
1963 }
1964 value new_buf = make_string_uninit(new_length);
1965 memcpy(string_buf(new_buf), string_buf(R_LEXEME), buf_length);
1966 R_LEXEME = new_buf;
1967 }
1968 string_buf(R_LEXEME)[lexeme_length] = c;
1969 lexeme_length++;
1970}
1971static void lexbuf_done(void) { string_truncate(R_LEXEME, lexeme_length); }
1972
1973static int is_letter(int c) {
1974 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
1975}
1976static int is_digit(int c) { return (c >= '0' && c <= '9'); }
1977static int in_str(int c, const char *s) {
1978 for (; *s; s++) if (*s == c) return 1;
1979 return 0;
1980}
1981static int is_whitespace(int c) { return in_str(c, " \t\n\f\r"); }
1982static int is_delimiter(int c) { return c == EOF || in_str(c, " \t\n\f\r()\";"); }
1983static int is_special_initial(int c) { return in_str(c, "!$%&*/:<=>?^_~"); }
1984static int is_special_subsequent(int c) { return in_str(c, "+-.@"); }
1985
1986typedef enum {
1987 tok_eof,
1988 tok_literal,
1989 tok_open_paren,
1990 tok_close_paren,
1991 tok_dot,
1992 tok_open_vector,
1993 tok_identifier,
1994 tok_named_char,
1995 tok_abbrev,
1996 tok_number,
1997} token_type;
1998
1999typedef enum {
2000 lex_start,
2001 lex_comment,
2002 lex_sharp,
2003 lex_bool,
2004 lex_comma,
2005 lex_dot,
2006 lex_dot2,
2007 lex_dot3,
2008 lex_ident,
2009 lex_string,
2010 lex_string_escape,
2011 lex_char,
2012 lex_char2,
2013 lex_named_char,
2014 lex_plus,
2015 lex_minus,
2016 lex_number,
2017} lexer_state;
2018
2019/* Finite state machine to read a token from R_PORT. Returns the token type and
2020 * sets R_LEXEME to the value, if applicable: the expanded symbol for the
2021 * quoting abbreviations, and a string for identifiers, named characters, and
2022 * numbers. */
2023
2024static token_type read_token(void) {
2025 lexer_state state = lex_start;
2026 uchar saved_char = 0;
2027 R_LEXEME = SC_NULL;
2028#define TRANSITION(s) { state = s; continue; }
2029#define PUT_BACK put_back_char(c)
2030 for (;;) {
2031 int c;
2032 value cv = read_char(R_PORT);
2033 c = (cv == SC_EOF) ? EOF : char_val(cv);
2034
2035 switch (state) {
2036 case lex_start:
2037 switch (c) {
2038 case EOF: return tok_eof;
2039 case '(': return tok_open_paren;
2040 case ')': return tok_close_paren;
2041 case '\'': R_LEXEME = s_quote; return tok_abbrev;
2042 case '`': R_LEXEME = s_quasiquote; return tok_abbrev;
2043 case '#': TRANSITION(lex_sharp);
2044 case ',': TRANSITION(lex_comma);
2045 case '.': TRANSITION(lex_dot);
2046 case ';': TRANSITION(lex_comment);
2047 case '"': lexbuf_init(); TRANSITION(lex_string);
2048 case '+': TRANSITION(lex_plus);
2049 case '-': TRANSITION(lex_minus);
2050 default:
2051 if (is_whitespace(c)) continue;
2052 lexbuf_init();
2053 if (is_letter(c) || is_special_initial(c)) {
2054 lexbuf_append(lc(c)); TRANSITION(lex_ident);
2055 }
2056 if (is_digit(c)) {
2057 lexbuf_append(c); TRANSITION(lex_number);
2058 }
2059 sc_error1("bad character at start of token:", character(c));
2060 }
2061 case lex_comment:
2062 if (c == '\n') TRANSITION(lex_start);
2063 if (c == EOF) return tok_eof;
2064 continue;
2065 case lex_sharp:
2066 switch (lc(c)) {
2067 case '(': return tok_open_vector;
2068 case 't': R_LEXEME = SC_TRUE; TRANSITION(lex_bool);
2069 case 'f': R_LEXEME = SC_FALSE; TRANSITION(lex_bool);
2070 case 'e':
2071 case 'i':
2072 case 'b':
2073 case 'o':
2074 case 'd':
2075 case 'x': lexbuf_init(); lexbuf_append('#'); lexbuf_append(c);
2076 TRANSITION(lex_number);
2077 case '\\': TRANSITION(lex_char);
2078 default: sc_error("bad # sequence");
2079 }
2080 case lex_bool:
2081 PUT_BACK;
2082 if (!is_delimiter(c)) sc_error("bad # sequence");
2083 return tok_literal;
2084 case lex_comma:
2085 if (c == '@') { R_LEXEME = s_unquote_splicing; return tok_abbrev; }
2086 PUT_BACK; R_LEXEME = s_unquote; return tok_abbrev;
2087 case lex_dot:
2088 if (is_delimiter(c)) { PUT_BACK; return tok_dot; }
2089 if (c == '.') TRANSITION(lex_dot2);
2090 lexbuf_init(); lexbuf_append('.'); lexbuf_append(c);
2091 TRANSITION(lex_number);
2092 case lex_dot2:
2093 if (c != '.') { PUT_BACK; sc_error("bad . sequence"); }
2094 TRANSITION(lex_dot3);
2095 case lex_dot3:
2096 PUT_BACK;
2097 if (is_delimiter(c)) {
2098 R_LEXEME = string("..."); return tok_identifier;
2099 }
2100 sc_error("bad . sequence");
2101 case lex_ident:
2102 if (is_letter(c) || is_special_initial(c) ||
2103 is_digit(c) || is_special_subsequent(c)) {
2104 lexbuf_append(lc(c)); continue;
2105 }
2106 PUT_BACK;
2107 if (is_delimiter(c)) { lexbuf_done(); return tok_identifier; }
2108 sc_error("bad identifier");
2109 case lex_string:
2110 switch (c) {
2111 case EOF: sc_error("unexpected end-of-file in string");
2112 case '"': lexbuf_done(); return tok_literal;
2113 case '\\': TRANSITION(lex_string_escape);
2114 default: lexbuf_append(c); continue;
2115 }
2116 case lex_string_escape:
2117 switch (c) {
2118 case EOF: sc_error("unexpected end-of-file in string");
2119 case '"':
2120 case '\\': lexbuf_append(c); TRANSITION(lex_string);
2121 default: sc_error("bad escape in string");
2122 }
2123 case lex_char:
2124 if (c == EOF) sc_error("unexpected end-of-file in character");
2125 saved_char = c; TRANSITION(lex_char2);
2126 case lex_char2:
2127 if (is_delimiter(c)) {
2128 PUT_BACK; R_LEXEME = character(saved_char); return tok_literal;
2129 }
2130 lexbuf_init(); lexbuf_append(lc(saved_char)); lexbuf_append(lc(c));
2131 TRANSITION(lex_named_char);
2132 case lex_named_char:
2133 if (is_delimiter(c)) {
2134 PUT_BACK;
2135 lexbuf_done();
2136 return tok_named_char;
2137 }
2138 lexbuf_append(lc(c)); continue;
2139 case lex_plus:
2140 if (is_delimiter(c)) {
2141 PUT_BACK; R_LEXEME = string("+"); return tok_identifier;
2142 }
2143 lexbuf_init(); lexbuf_append('+'); lexbuf_append(c);
2144 TRANSITION(lex_number);
2145 case lex_minus:
2146 if (is_delimiter(c)) {
2147 PUT_BACK; R_LEXEME = string("-"); return tok_identifier;
2148 }
2149 lexbuf_init(); lexbuf_append('-'); lexbuf_append(c);
2150 TRANSITION(lex_number);
2151 case lex_number:
2152 if (is_delimiter(c)) {
2153 PUT_BACK;
2154 lexbuf_done();
2155 return tok_number;
2156 }
2157 lexbuf_append(c); continue;
2158 }
2159 }
2160}
2161
2162
2163/******************
2164 * Bootstrap reader
2165 */
2166
2167/* Read a value from R_PORT, using a predictive parser for Scheme's LL(1)
2168 * grammar (report section 7.1.2). The sole purpose is to parse the compiler
2169 * and library code at startup (though this started out as the only reader).
2170 * Does not handle named characters or any numeric syntax beyond plain decimal
2171 * fixnums.
2172 *
2173 * O(n) runtime except for symbols, as interning is currently proportional to
2174 * the symbol table size for each one. Implemented as subroutines calling on
2175 * the Scheme stack, much like the evaluator, so there is no overflow hazard or
2176 * nesting depth limit other than available heap space. */
2177
2178/* Minimal base-10 fixnum decoder */
2179static value str_to_fixnum(value s) {
2180 uchar *p = string_buf(s);
2181 value len = string_len(s), neg = 0, acc = 0;
2182 if (!len) goto err;
2183 if (*p == '-') {
2184 neg = 1; --len; ++p;
2185 if (!len) goto err;
2186 }
2187 for (; len; --len, ++p) {
2188 if (!is_digit(*p)) goto err;
2189 if (acc > FIXNUM_MAX/10) goto err;
2190 acc = 10*acc + (*p - '0');
2191 }
2192 if (acc > FIXNUM_MAX) goto err;
2193 return fixnum(neg ? -acc : acc);
2194err:
2195 sc_error("bad number token");
2196}
2197
2198/* Return addresses */
2199#define RD_DONE 0
2200#define RD_LIST_FIRST 1
2201#define RD_LIST_LOOP 2
2202#define RD_LIST_DOT 3
2203#define RD_ABBREV 4
2204#define RD_VEC_LOOP 5
2205
2206static value sc_read(void) {
2207 token_type t;
2208 CALL(datum, RD_DONE);
2209
2210dispatch:
2211 switch (pop()) {
2212 case RD_DONE:
2213 break;
2214
2215datum:
2216 t = read_token();
2217 switch (t) {
2218 case tok_eof: RETURN(SC_EOF);
2219 case tok_literal: RETURN(R_LEXEME);
2220 case tok_open_paren: goto list;
2221 case tok_close_paren: RETURN(RD_CLOSEPAREN);
2222 case tok_dot: RETURN(RD_DOT);
2223 case tok_open_vector: goto vector;
2224 case tok_identifier: R_CAR = R_LEXEME; RETURN(string_to_symbol());
2225 case tok_named_char: sc_error("named characters unsupported");
2226 case tok_abbrev: goto abbrev;
2227 case tok_number: RETURN(str_to_fixnum(R_LEXEME));
2228 }
2229
2230list:
2231 CALL(datum, RD_LIST_FIRST);
2232 case RD_LIST_FIRST:
2233 if (R_RESULT == RD_CLOSEPAREN) RETURN(SC_NULL);
2234 if (R_RESULT == RD_DOT) sc_error("dotted list without first item");
2235 if (R_RESULT == SC_EOF) sc_error("unexpected end-of-file in list");
2236 R_CAR = R_RESULT;
2237 R_CDR = SC_NULL;
2238 R_CAR = cons();
2239 push(); /* list head */
2240 for (;;) {
2241 push(); /* list tail */
2242 CALL(datum, RD_LIST_LOOP);
2243 case RD_LIST_LOOP:
2244 if (R_RESULT == RD_CLOSEPAREN) {
2245 drop(); /* list tail */
2246 RETURN(pop()); /* list head */
2247 }
2248 if (R_RESULT == RD_DOT) {
2249 CALL(datum, RD_LIST_DOT);
2250 case RD_LIST_DOT:
2251 if (R_RESULT == RD_CLOSEPAREN)
2252 sc_error("dotted list without last item");
2253 if (R_RESULT == RD_DOT) sc_error("extra dot in dotted list");
2254 if (R_RESULT == SC_EOF)
2255 sc_error("unexpected end-of-file in list");
2256 PUSH(R_RESULT)
2257 t = read_token();
2258 R_RESULT = pop();
2259 R_CAR = pop(); /* list tail */
2260 if (t == tok_close_paren) {
2261 set_cdr(R_CAR, R_RESULT);
2262 RETURN(pop()); /* list head */
2263 }
2264 if (t == tok_eof) sc_error("unexpected end-of-file in list");
2265 sc_error("excess item in tail of dotted list");
2266 }
2267 if (R_RESULT == SC_EOF) sc_error("unexpected end-of-file in list");
2268 R_CAR = R_RESULT;
2269 R_CDR = SC_NULL;
2270 R_CAR = cons();
2271 R_CDR = pop(); /* list tail */
2272 set_cdr(R_CDR, R_CAR);
2273 }
2274
2275abbrev: /* 'x -> (quote x) etc. */
2276 PUSH(R_LEXEME) /* expanded abbrev symbol */
2277 CALL(datum, RD_ABBREV);
2278 case RD_ABBREV:
2279 if (R_RESULT == RD_CLOSEPAREN)
2280 sc_error("unexpected close-paren in abbreviation");
2281 if (R_RESULT == RD_DOT)
2282 sc_error("unexpected dot in abbreviation");
2283 if (R_RESULT == SC_EOF)
2284 sc_error("unexpected end-of-file in abbreviation");
2285 R_CAR = R_RESULT;
2286 R_CDR = SC_NULL;
2287 R_CDR = cons();
2288 R_CAR = pop(); /* expanded abbrev symbol */
2289 RETURN(cons());
2290
2291vector:
2292 /* First build a list */
2293 R_CAR = SC_NULL;
2294 for (;;) {
2295 push(); /* list head */
2296 CALL(datum, RD_VEC_LOOP);
2297 case RD_VEC_LOOP:
2298 if (R_RESULT == RD_CLOSEPAREN) {
2299 /* Then copy to a new vector while un-reversing */
2300 R_EXPR = pop(); /* list head */
2301 RETURN(rev_list_to_vec());
2302 }
2303 if (R_RESULT == RD_DOT) sc_error("unexpected dot in vector");
2304 if (R_RESULT == SC_EOF)
2305 sc_error("unexpected end-of-file in vector");
2306 R_CAR = R_RESULT;
2307 R_CDR = pop(); /* list head */
2308 R_CAR = cons();
2309 }
2310
2311 }
2312 if (R_RESULT == RD_CLOSEPAREN) sc_error("unexpected close-paren");
2313 if (R_RESULT == RD_DOT) sc_error("unexpected dot");
2314 return R_RESULT;
2315}
2316
2317
2318/*****************
2319 * Number printers
2320 */
2321
2322static char fmt_buf[128]; /* TODO justify size */
2323static const char *fmt_fixnum_dec(long val) {
2324 int i = sizeof(fmt_buf) - 1, neg = 0;
2325 /* TODO null termination is convenient here but perhaps not ideal */
2326 fmt_buf[i] = 0;
2327 if (val < 0) { neg = 1; val = -val; }
2328 do {
2329 --i; assert(i);
2330 fmt_buf[i] = '0' + (val % 10);
2331 val /= 10;
2332 } while (val);
2333 if (neg) fmt_buf[--i] = '-';
2334 return fmt_buf+i;
2335}
2336static const char *fmt_ulong_dec(ulong val) {
2337 int i = sizeof(fmt_buf) - 1;
2338 fmt_buf[i] = 0;
2339 do {
2340 --i; assert(i >= 0);
2341 fmt_buf[i] = '0' + (val % 10);
2342 val /= 10;
2343 } while (val);
2344 return fmt_buf+i;
2345}
2346static const char *fmt_fixnum_hex(long val) {
2347 int i = sizeof(fmt_buf) - 1, neg = 0;
2348 fmt_buf[i] = 0;
2349 if (val < 0) { neg = 1; val = -val; }
2350 do {
2351 --i; assert(i);
2352 fmt_buf[i] = "0123456789abcdef"[val & 0xf];
2353 val >>= 4;
2354 } while (val);
2355 if (neg) fmt_buf[--i] = '-';
2356 return fmt_buf+i;
2357}
2358static const char *fmt_fixnum_oct(long val) {
2359 int i = sizeof(fmt_buf) - 1, neg = 0;
2360 fmt_buf[i] = 0;
2361 if (val < 0) { neg = 1; val = -val; }
2362 do {
2363 --i; assert(i);
2364 fmt_buf[i] = '0' + (val & 7);
2365 val >>= 3;
2366 } while (val);
2367 if (neg) fmt_buf[--i] = '-';
2368 return fmt_buf+i;
2369}
2370static const char *fmt_fixnum_bin(long val) {
2371 int i = sizeof(fmt_buf) - 1, neg = 0;
2372 fmt_buf[i] = 0;
2373 if (val < 0) { neg = 1; val = -val; }
2374 do {
2375 --i; assert(i);
2376 fmt_buf[i] = '0' + (val & 1);
2377 val >>= 1;
2378 } while (val);
2379 if (neg) fmt_buf[--i] = '-';
2380 return fmt_buf+i;
2381}
2382static const char *fmt_ulong_bin(ulong val) {
2383 int i = sizeof(fmt_buf) - 1;
2384 fmt_buf[i] = 0;
2385 do {
2386 --i; assert(i);
2387 fmt_buf[i] = '0' + (val & 1);
2388 val >>= 1;
2389 } while (val);
2390 return fmt_buf+i;
2391}
2392static const char *fmt_flonum_dec(double val) {
2393 /* TODO follow up on R5RS citations 3 and 5 */
2394 if ((size_t)snprintf(fmt_buf, sizeof fmt_buf, "%.15g", val) >=
2395 sizeof fmt_buf)
2396 sc_error("BUG: flonum formatting truncated");
2397 return fmt_buf;
2398}
2399
2400/****************************
2401 * Fallback (shallow) printer
2402 */
2403
2404/* Print the value in R_EXPR to R_PORT, using "write" style (quoting strings
2405 * and characters) but not expanding named characters or looking inside
2406 * compound objects. (This used to be the real printer, implemented as
2407 * recursive subroutines on the Scheme stack like the reader and evaluator, but
2408 * is now just for low-level debug and fallback error handlers.) */
2409
2410static void shallow_print(void) {
2411 int t = tag(R_EXPR);
2412 if (t == T_SPECIAL) {
2413 const char *s;
2414 if (R_EXPR == SC_NULL) s = "()";
2415 else if (R_EXPR == SC_TRUE) s = "#t";
2416 else if (R_EXPR == SC_FALSE) s = "#f";
2417 else if (R_EXPR == SC_EOF) s = "#EOF";
2418 else if (R_EXPR == SC_NULL_ENV) s = "#ENVSPEC:NULL";
2419 else if (R_EXPR == SC_REPORT_ENV) s = "#ENVSPEC:SCHEME-REPORT";
2420 else if (R_EXPR == SC_GSCM_ENV) s = "#ENVSPEC:GALES-SCHEME";
2421 else if (R_EXPR == SC_INTERACT_ENV) s = "#ENVSPEC:INTERACTION";
2422 else if (R_EXPR == SC_TOPLEVEL_ENV) s = "#ENVSPEC:TOPLEVEL";
2423 else if (R_EXPR == UNDEFINED) s = "#UNDEFINED";
2424 else if (R_EXPR == RD_CLOSEPAREN) s = "#RDSENTINEL:CLOSEPAREN";
2425 else if (R_EXPR == RD_DOT) s = "#RDSENTINEL:DOT";
2426 else fatal("BUG: invalid special in shallow_print");
2427 write_cstr(s);
2428 }
2429 else if (t == T_IMMUT_PAIR) write_cstr("#IMMUTABLE-PAIR");
2430 else if (t == T_PAIR) write_cstr("#PAIR");
2431 else if (t == T_CHARACTER) { write_cstr("#\\"); write_char(R_EXPR); }
2432 else if (t == T_FIXNUM) write_cstr(fmt_fixnum_dec(fixnum_val(R_EXPR)));
2433 else if (t == T_EXTENDED) {
2434 t = ext_tag(heap[untag(R_EXPR)]);
2435 if ((t | 1) == T_STRING) write_str_quoted(R_EXPR);
2436 else if ((t | 1) == T_VECTOR) {
2437 if (t == T_VECTOR) write_cstr("#VECTOR:");
2438 else write_cstr("#IMMUTABLE-VECTOR:");
2439 write_cstr(fmt_fixnum_dec(vector_len(R_EXPR)));
2440 }
2441 else if (t == T_SYMBOL) write_str(R_EXPR);
2442 else if (t == T_BUILTIN) {
2443 write_cstr("#BUILTIN:");
2444 write_cstr(builtin_name(R_EXPR));
2445 }
2446 else if (t == T_PROCEDURE) write_cstr("#PROCEDURE");
2447 else if (t == T_CONTINUATION) write_cstr("#CONTINUATION");
2448 else if (t == T_PROMISE) write_cstr("#PROMISE");
2449 else if (t == T_PORT) write_cstr("#PORT");
2450 else if (t == T_FLONUM) write_cstr("#FLONUM");
2451 else if (t == T_BIGNUM) write_cstr("#BIGNUM");
2452 else if (t == T_RATIONAL) write_cstr("#RATIONAL");
2453 else if (t == T_COMPLEX) write_cstr("#COMPLEX");
2454 else if (t == T_VARIABLE_REF) write_cstr("#VARIABLE-REF");
2455 else fatal("BUG: invalid extended tag in shallow_print");
2456 }
2457 else fatal("BUG: invalid tag in shallow_print");
2458}
2459
2460
2461/********************
2462 * Builtin procedures
2463 */
2464
2465/* Argument wrangling helpers for builtins */
2466
2467static void require_args(value args) {
2468 if (args == SC_NULL) sc_error("too few arguments");
2469}
2470
2471static void no_args(value args) {
2472 if (args != SC_NULL) sc_error("too many arguments");
2473}
2474
2475static value extract_arg(value *args) {
2476 require_args(*args);
2477 value arg = car(*args);
2478 *args = cdr(*args);
2479 return arg;
2480}
2481
2482static value final_arg(value args) {
2483 require_args(args);
2484 no_args(cdr(args));
2485 return car(args);
2486}
2487
2488static value require_input_port(value arg) {
2489 if (!is_input_port(arg)) sc_error("not an input port"); return arg;
2490}
2491
2492static value require_output_port(value arg) {
2493 if (!is_output_port(arg)) sc_error("not an output port"); return arg;
2494}
2495
2496static value opt_final_in_port_arg(value args) {
2497 return require_input_port(args == SC_NULL ? r_input_port :
2498 final_arg(args));
2499}
2500
2501static value opt_final_out_port_arg(value args) {
2502 return require_output_port(args == SC_NULL ? r_output_port :
2503 final_arg(args));
2504}
2505
2506static value require_symbol(value arg) {
2507 if (!is_symbol(arg)) sc_error1("not a symbol:", arg);
2508 return arg;
2509}
2510
2511static value require_string(value arg) {
2512 if (!is_string(arg)) sc_error1("not a string:", arg);
2513 return arg;
2514}
2515
2516static value require_mutable_string(value arg) {
2517 if (!is_mutable_string(arg)) {
2518 if (is_string(arg)) sc_error1("immutable string:", arg);
2519 sc_error1("not a string:", arg);
2520 }
2521 return arg;
2522}
2523
2524static value require_stringlike(value arg) {
2525 if (!(is_string(arg) || is_symbol(arg)))
2526 sc_error1("not a string or symbol:", arg);
2527 return arg;
2528}
2529
2530static value require_vector(value arg) {
2531 if (!is_vector(arg)) sc_error1("not a vector:", arg);
2532 return arg;
2533}
2534
2535static value require_mutable_vector(value arg) {
2536 if (!is_mutable_vector(arg)) {
2537 if (is_vector(arg)) sc_error1("immutable vector:", arg);
2538 sc_error1("not a vector:", arg);
2539 }
2540 return arg;
2541}
2542
2543static value require_fixnum(value arg) {
2544 if (!is_fixnum(arg)) sc_error1("not a fixnum:", arg);
2545 return arg;
2546}
2547
2548static value require_procedure(value arg) {
2549 if (!is_procedure(arg)) sc_error1("not a procedure:", arg);
2550 return arg;
2551}
2552
2553#define BUILTIN(name) static value name(value args)
2554
2555/* Mnemonic for multi-valued returns, i.e. passing multiple values to the
2556 * current continuation. f_values is strictly an optimization; we could just as
2557 * well set R_PROC to current_continuation() and r_flag to f_apply.
2558 * The arg list must be newly allocated! */
2559#define RETURN_VALUES(args) { \
2560 R_ARGS = args; \
2561 r_flag = f_values; \
2562 return SC_NULL; \
2563}
2564
2565/* 6.1 Equivalence predicates */
2566
2567BUILTIN(builtin_is_eq) {
2568 value a = extract_arg(&args);
2569 return boolean(a == final_arg(args));
2570}
2571
2572/* 6.2.5 Numerical operations */
2573
2574BUILTIN(builtin_is_number) { return boolean(is_number(final_arg(args))); }
2575BUILTIN(builtin_is_integer) { return boolean(is_integer(final_arg(args))); }
2576BUILTIN(builtin_is_exact) { return boolean(is_exact(final_arg(args))); }
2577BUILTIN(builtin_is_inexact) { return boolean(is_flonum(final_arg(args))); }
2578
2579/* 6.3.1 Booleans */
2580
2581BUILTIN(builtin_not) { return boolean(final_arg(args) == SC_FALSE); }
2582BUILTIN(builtin_is_boolean) { return boolean(is_boolean(final_arg(args))); }
2583
2584/* 6.3.2 Pairs and lists */
2585
2586BUILTIN(builtin_is_pair) { return boolean(is_pair(final_arg(args))); }
2587BUILTIN(builtin_cons) {
2588 R_CAR = extract_arg(&args);
2589 R_CDR = final_arg(args);
2590 return cons();
2591}
2592
2593BUILTIN(builtin_car) { return safe_car(final_arg(args)); }
2594BUILTIN(builtin_cdr) { return safe_cdr(final_arg(args)); }
2595
2596BUILTIN(builtin_caar) { return safe_car(builtin_car(args)); }
2597BUILTIN(builtin_cadr) { return safe_car(builtin_cdr(args)); }
2598BUILTIN(builtin_cdar) { return safe_cdr(builtin_car(args)); }
2599BUILTIN(builtin_cddr) { return safe_cdr(builtin_cdr(args)); }
2600
2601BUILTIN(builtin_caaar) { return safe_car(builtin_caar(args)); }
2602BUILTIN(builtin_caadr) { return safe_car(builtin_cadr(args)); }
2603BUILTIN(builtin_cadar) { return safe_car(builtin_cdar(args)); }
2604BUILTIN(builtin_caddr) { return safe_car(builtin_cddr(args)); }
2605BUILTIN(builtin_cdaar) { return safe_cdr(builtin_caar(args)); }
2606BUILTIN(builtin_cdadr) { return safe_cdr(builtin_cadr(args)); }
2607BUILTIN(builtin_cddar) { return safe_cdr(builtin_cdar(args)); }
2608BUILTIN(builtin_cdddr) { return safe_cdr(builtin_cddr(args)); }
2609
2610BUILTIN(builtin_caaaar) { return safe_car(builtin_caaar(args)); }
2611BUILTIN(builtin_caaadr) { return safe_car(builtin_caadr(args)); }
2612BUILTIN(builtin_caadar) { return safe_car(builtin_cadar(args)); }
2613BUILTIN(builtin_caaddr) { return safe_car(builtin_caddr(args)); }
2614BUILTIN(builtin_cadaar) { return safe_car(builtin_cdaar(args)); }
2615BUILTIN(builtin_cadadr) { return safe_car(builtin_cdadr(args)); }
2616BUILTIN(builtin_caddar) { return safe_car(builtin_cddar(args)); }
2617BUILTIN(builtin_cadddr) { return safe_car(builtin_cdddr(args)); }
2618BUILTIN(builtin_cdaaar) { return safe_cdr(builtin_caaar(args)); }
2619BUILTIN(builtin_cdaadr) { return safe_cdr(builtin_caadr(args)); }
2620BUILTIN(builtin_cdadar) { return safe_cdr(builtin_cadar(args)); }
2621BUILTIN(builtin_cdaddr) { return safe_cdr(builtin_caddr(args)); }
2622BUILTIN(builtin_cddaar) { return safe_cdr(builtin_cdaar(args)); }
2623BUILTIN(builtin_cddadr) { return safe_cdr(builtin_cdadr(args)); }
2624BUILTIN(builtin_cdddar) { return safe_cdr(builtin_cddar(args)); }
2625BUILTIN(builtin_cddddr) { return safe_cdr(builtin_cdddr(args)); }
2626
2627BUILTIN(builtin_set_car) {
2628 value p = extract_arg(&args);
2629 value val = final_arg(args);
2630 if (tag(p) != T_PAIR) {
2631 if (tag(p) == T_IMMUT_PAIR) sc_error("immutable pair");
2632 sc_error("not a pair");
2633 }
2634 set_car(p, val);
2635 return SC_NULL;
2636}
2637BUILTIN(builtin_set_cdr) {
2638 value p = extract_arg(&args);
2639 value val = final_arg(args);
2640 if (tag(p) != T_PAIR) {
2641 if (tag(p) == T_IMMUT_PAIR) sc_error("immutable pair");
2642 sc_error("not a pair");
2643 }
2644 set_cdr(p, val);
2645 return SC_NULL;
2646}
2647
2648BUILTIN(builtin_is_null) { return boolean(final_arg(args) == SC_NULL); }
2649BUILTIN(builtin_is_list) { return boolean(is_list(final_arg(args))); }
2650
2651BUILTIN(builtin_length) {
2652 long len = safe_list_length(final_arg(args));
2653 if (len < 0) sc_error("not a list");
2654 return fixnum(len);
2655}
2656
2657/* 6.3.3 Symbols */
2658
2659BUILTIN(builtin_is_symbol) { return boolean(is_symbol(final_arg(args))); }
2660
2661BUILTIN(builtin_sym_to_str) {
2662 /* TODO use immutability to avoid copying */
2663 R_EXPR = require_symbol(final_arg(args));
2664 return string_copy_immutable();
2665}
2666
2667BUILTIN(builtin_str_to_sym) {
2668 R_CAR = require_string(final_arg(args));
2669 return string_to_symbol();
2670}
2671
2672/* 6.3.4 Characters */
2673
2674BUILTIN(builtin_is_char) { return boolean(is_character(final_arg(args))); }
2675
2676#define CHAR1 uchar a = safe_char_val(final_arg(args));
2677#define CHAR2 uchar a = safe_char_val(extract_arg(&args)); \
2678 uchar b = safe_char_val(final_arg(args));
2679
2680BUILTIN(builtin_char_eq) { CHAR2 return boolean(a == b); }
2681BUILTIN(builtin_char_lt) { CHAR2 return boolean(a < b); }
2682BUILTIN(builtin_char_gt) { CHAR2 return boolean(a > b); }
2683BUILTIN(builtin_char_le) { CHAR2 return boolean(a <= b); }
2684BUILTIN(builtin_char_ge) { CHAR2 return boolean(a >= b); }
2685BUILTIN(builtin_char_ci_eq) { CHAR2 return boolean(lc(a) == lc(b)); }
2686BUILTIN(builtin_char_ci_lt) { CHAR2 return boolean(lc(a) < lc(b)); }
2687BUILTIN(builtin_char_ci_gt) { CHAR2 return boolean(lc(a) > lc(b)); }
2688BUILTIN(builtin_char_ci_le) { CHAR2 return boolean(lc(a) <= lc(b)); }
2689BUILTIN(builtin_char_ci_ge) { CHAR2 return boolean(lc(a) >= lc(b)); }
2690
2691BUILTIN(builtin_char_is_alpha) {
2692 CHAR1 return boolean((a >= 'A' && a <= 'Z') || (a >= 'a' && a <= 'z'));
2693}
2694BUILTIN(builtin_char_is_num) {
2695 CHAR1 return boolean(a >= '0' && a <= '9');
2696}
2697BUILTIN(builtin_char_is_white) { CHAR1 return boolean(is_whitespace(a)); }
2698BUILTIN(builtin_char_is_upper) { CHAR1 return boolean(a >= 'A' && a <= 'Z'); }
2699BUILTIN(builtin_char_is_lower) { CHAR1 return boolean(a >= 'a' && a <= 'z'); }
2700
2701BUILTIN(builtin_char_to_int) { CHAR1 return fixnum(a); }
2702
2703BUILTIN(builtin_int_to_char) {
2704 long n = safe_fixnum_val(final_arg(args));
2705 if (n < 0 || n > 255) sc_error1("out of bounds:", fixnum(n));
2706 return character(n);
2707}
2708
2709BUILTIN(builtin_char_upcase) { CHAR1 return character(uc(a)); }
2710BUILTIN(builtin_char_downcase) { CHAR1 return character(lc(a)); }
2711
2712/* 6.3.5 Strings */
2713
2714BUILTIN(builtin_is_str) { return boolean(is_string(final_arg(args))); }
2715
2716BUILTIN(builtin_make_str) {
2717 long len = safe_fixnum_val(extract_arg(&args));
2718 uchar fill = (args == SC_NULL) ? ' ' : safe_char_val(final_arg(args));
2719 return make_string(len, fill);
2720}
2721
2722BUILTIN(builtin_str_length) {
2723 return fixnum(string_len(require_string(final_arg(args))));
2724}
2725
2726BUILTIN(builtin_str_ref) {
2727 value s = require_string(extract_arg(&args));
2728 value k = final_arg(args);
2729 value k_unsigned = safe_fixnum_val(k);
2730 /* see builtin_vec_ref comments */
2731 if (k_unsigned >= string_len(s)) sc_error1("out of bounds:", k);
2732 return character(string_buf(s)[k_unsigned]);
2733}
2734
2735BUILTIN(builtin_str_set) {
2736 value s = require_mutable_string(extract_arg(&args));
2737 value k = extract_arg(&args);
2738 uchar new_char = safe_char_val(final_arg(args));
2739 value k_unsigned = safe_fixnum_val(k);
2740 /* see builtin_vec_ref comments */
2741 if (k_unsigned >= string_len(s)) sc_error1("out of bounds:", k);
2742 string_buf(s)[k_unsigned] = new_char;
2743 return SC_NULL;
2744}
2745
2746#define STR2 value a = require_string(extract_arg(&args)); \
2747 value b = require_string(final_arg(args)); \
2748 size_t a_len = string_len(a), b_len = string_len(b); \
2749 uchar *a_buf = string_buf(a), *b_buf = string_buf(b);
2750
2751BUILTIN(builtin_str_eq) {
2752 STR2
2753 if (a_len != b_len) return SC_FALSE;
2754 return boolean(memcmp(a_buf, b_buf, a_len) == 0);
2755}
2756
2757#define STRCMP \
2758 STR2 int cmp = memcmp(a_buf, b_buf, (a_len < b_len) ? a_len : b_len);
2759
2760BUILTIN(builtin_str_lt) {
2761 STRCMP return boolean(cmp < 0 || (cmp == 0 && a_len < b_len));
2762}
2763BUILTIN(builtin_str_gt) {
2764 STRCMP return boolean(cmp > 0 || (cmp == 0 && a_len > b_len));
2765}
2766BUILTIN(builtin_str_le) {
2767 STRCMP return boolean(cmp < 0 || (cmp == 0 && a_len <= b_len));
2768}
2769BUILTIN(builtin_str_ge) {
2770 STRCMP return boolean(cmp > 0 || (cmp == 0 && a_len >= b_len));
2771}
2772
2773static int memcmp_ci(const void *s1, const void *s2, size_t n) {
2774 const uchar *b1 = s1, *b2 = s2;
2775 uchar c1, c2;
2776 size_t i;
2777 for (i = 0; i < n; i++) {
2778 c1 = lc(b1[i]);
2779 c2 = lc(b2[i]);
2780 if (c1 < c2) return -1;
2781 if (c1 > c2) return 1;
2782 }
2783 return 0;
2784}
2785
2786BUILTIN(builtin_str_ci_eq) {
2787 STR2
2788 if (a_len != b_len) return SC_FALSE;
2789 return boolean(memcmp_ci(a_buf, b_buf, a_len) == 0);
2790}
2791
2792#define STRCMP_CI STR2 \
2793 int cmp = memcmp_ci(a_buf, b_buf, (a_len < b_len) ? a_len : b_len);
2794
2795BUILTIN(builtin_str_ci_lt) {
2796 STRCMP_CI return boolean(cmp < 0 || (cmp == 0 && a_len < b_len));
2797}
2798BUILTIN(builtin_str_ci_gt) {
2799 STRCMP_CI return boolean(cmp > 0 || (cmp == 0 && a_len > b_len));
2800}
2801BUILTIN(builtin_str_ci_le) {
2802 STRCMP_CI return boolean(cmp < 0 || (cmp == 0 && a_len <= b_len));
2803}
2804BUILTIN(builtin_str_ci_ge) {
2805 STRCMP_CI return boolean(cmp > 0 || (cmp == 0 && a_len >= b_len));
2806}
2807
2808BUILTIN(builtin_substr) {
2809 value len = string_len(R_EXPR = require_string(extract_arg(&args))),
2810 start = extract_arg(&args), end = final_arg(args),
2811 start_unsigned = safe_fixnum_val(start),
2812 end_unsigned = safe_fixnum_val(end);
2813 if (start_unsigned > len) sc_error1("start out of bounds:", start);
2814 if (end_unsigned > len) sc_error1("end out of bounds:", end);
2815 if (end_unsigned < start_unsigned) sc_error("end less than start");
2816 len = end_unsigned - start_unsigned;
2817 R_RESULT = make_string_uninit(len);
2818 memcpy(string_buf(R_RESULT), string_buf(R_EXPR)+start_unsigned, len);
2819 return R_RESULT;
2820}
2821
2822BUILTIN(builtin_str_append) {
2823 value p, s, len = 0;
2824 uchar *buf;
2825 R_ARGS = args;
2826 for (p = R_ARGS; p != SC_NULL; p = cdr(p)) {
2827 len += string_len(require_string(car(p)));
2828 if (len > EXT_LENGTH_MAX) sc_error("length too large for string");
2829 }
2830 R_RESULT = make_string_uninit(len);
2831 buf = string_buf(R_RESULT);
2832 for (p = R_ARGS; p != SC_NULL; p = cdr(p)) {
2833 s = car(p);
2834 len = string_len(s);
2835 memcpy(buf, string_buf(s), len);
2836 buf += len;
2837 }
2838 return R_RESULT;
2839}
2840
2841BUILTIN(builtin_list_to_str) {
2842 long len, i;
2843 value s;
2844 uchar *buf;
2845 R_ARGS = final_arg(args);
2846 len = safe_list_length(R_ARGS);
2847 if (len < 0) sc_error("not a list");
2848 s = make_string_uninit(len);
2849 buf = string_buf(s);
2850 for (i = 0; i < len; i++) {
2851 buf[i] = safe_char_val(car(R_ARGS));
2852 R_ARGS = cdr(R_ARGS);
2853 }
2854 return s;
2855}
2856
2857BUILTIN(builtin_str_copy) {
2858 R_EXPR = require_string(final_arg(args));
2859 return string_copy();
2860}
2861
2862BUILTIN(builtin_str_fill) {
2863 value s = require_mutable_string(extract_arg(&args));
2864 memset(string_buf(s), safe_char_val(final_arg(args)), string_len(s));
2865 return SC_NULL;
2866}
2867
2868/* 6.3.6 Vectors */
2869
2870BUILTIN(builtin_is_vector) { return boolean(is_vector(final_arg(args))); }
2871
2872BUILTIN(builtin_make_vector) {
2873 long len = safe_fixnum_val(extract_arg(&args));
2874 R_EXPR = (args == SC_NULL) ? SC_NULL : final_arg(args);
2875 return make_vector(len);
2876}
2877
2878BUILTIN(builtin_vec_length) {
2879 value vec = require_vector(final_arg(args));
2880 return fixnum(vector_len(vec));
2881}
2882
2883BUILTIN(builtin_vec_ref) {
2884 value vec = require_vector(extract_arg(&args));
2885 value k = final_arg(args);
2886 value k_unsigned = safe_fixnum_val(k);
2887 if (k_unsigned >= vector_len(vec)) sc_error1("out of bounds:", k);
2888 /* We don't need to also check for negative k: as value is an unsigned
2889 * type, the assignment from long causes a negative to be seen as a
2890 * positive greater than the longest allowed vector length.
2891 * XXX: are there weird machines where this isn't true? */
2892 return vector_ref(vec, k_unsigned);
2893}
2894
2895BUILTIN(builtin_vec_set) {
2896 value vec = require_mutable_vector(extract_arg(&args));
2897 value k = extract_arg(&args);
2898 value obj = final_arg(args);
2899 value k_unsigned = safe_fixnum_val(k);
2900 if (k_unsigned >= vector_len(vec)) sc_error1("out of bounds:", k);
2901 vector_set(vec, k_unsigned, obj);
2902 return SC_NULL;
2903}
2904
2905BUILTIN(builtin_list_to_vec) {
2906 long len;
2907 value vec, *p;
2908 R_ARGS = final_arg(args);
2909 len = safe_list_length(R_ARGS);
2910 if (len < 0) sc_error("not a list");
2911 vec = make_vector_uninit(len);
2912 p = heap + untag(vec) + 1;
2913 for (; len; --len, ++p, R_ARGS = cdr(R_ARGS)) *p = car(R_ARGS);
2914 return vec;
2915}
2916
2917BUILTIN(builtin_vec_fill) {
2918 value vec = require_mutable_vector(extract_arg(&args));
2919 value fill = final_arg(args);
2920 value len = vector_len(vec), i;
2921 for (i = 0; i < len; i++) vector_set(vec, i, fill);
2922 return SC_NULL;
2923}
2924
2925/* 6.4 Control features */
2926
2927BUILTIN(builtin_is_procedure) { return boolean(is_procedure(final_arg(args))); }
2928
2929BUILTIN(builtin_force) {
2930 R_EXPR = final_arg(args);
2931 r_flag = f_force;
2932 return SC_NULL;
2933}
2934
2935BUILTIN(builtin_call_cc) {
2936 R_PROC = require_procedure(final_arg(args));
2937 R_CAR = current_continuation();
2938 R_CDR = SC_NULL;
2939 R_ARGS = cons();
2940 r_flag = f_apply;
2941 return SC_NULL;
2942}
2943
2944BUILTIN(builtin_values) RETURN_VALUES(args)
2945
2946BUILTIN(builtin_call_with_values) {
2947 R_PROC = extract_arg(&args);
2948 R_ARGS = final_arg(args);
2949 r_flag = f_call_with_values;
2950 return SC_NULL;
2951}
2952
2953/* 6.5 Eval */
2954
2955BUILTIN(builtin_eval) {
2956 R_EXPR = extract_arg(&args);
2957 value e = final_arg(args);
2958 switch (e) {
2959 case SC_NULL_ENV:
2960 R_ENV = SC_NULL; break;
2961 case SC_REPORT_ENV:
2962 R_ENV = r5rs_env; break;
2963 case SC_GSCM_ENV:
2964 R_ENV = gscm_env; break;
2965 case SC_INTERACT_ENV:
2966 R_ENV = interaction_env; break;
2967 case SC_TOPLEVEL_ENV:
2968 R_ENV = toplevel_env; break;
2969 default:
2970 sc_error1("not an environment specifier:", e);
2971 }
2972 r_flag = f_compile;
2973 return SC_NULL;
2974}
2975
2976BUILTIN(builtin_report_env) {
2977 if (safe_fixnum_val(final_arg(args)) != 5)
2978 sc_error("unsupported version");
2979 return SC_REPORT_ENV;
2980}
2981BUILTIN(builtin_null_env) {
2982 if (safe_fixnum_val(final_arg(args)) != 5)
2983 sc_error("unsupported version");
2984 return SC_NULL_ENV;
2985}
2986BUILTIN(builtin_interaction_env) {
2987 no_args(args);
2988 return SC_INTERACT_ENV;
2989}
2990
2991/* 6.6.1 Ports */
2992
2993BUILTIN(builtin_is_port) {
2994 return boolean(is_port(final_arg(args)));
2995}
2996BUILTIN(builtin_is_in_port) {
2997 return boolean(is_input_port(final_arg(args)));
2998}
2999BUILTIN(builtin_is_out_port) {
3000 return boolean(is_output_port(final_arg(args)));
3001}
3002
3003BUILTIN(builtin_current_in_port) { no_args(args); return r_input_port; }
3004BUILTIN(builtin_current_out_port) { no_args(args); return r_output_port; }
3005
3006BUILTIN(builtin_open_in_file) {
3007 int fd;
3008 R_EXPR = require_string(final_arg(args));
3009 fd = open_cloexec(c_string_buf(string_append_null()), O_RDONLY);
3010 if (fd == -1) sc_perror1(R_EXPR);
3011 return make_port(fd, 0, DEFAULT_R_BUF);
3012}
3013
3014BUILTIN(builtin_open_out_file) {
3015 int fd, flags = O_WRONLY | O_CREAT;
3016 value if_exists;
3017 R_EXPR = require_string(extract_arg(&args));
3018 if (args == SC_NULL) if_exists = s_truncate;
3019 else if_exists = final_arg(args);
3020
3021 if (if_exists == s_truncate) flags |= O_TRUNC;
3022 else if (if_exists == s_overwrite) ;
3023 else if (if_exists == s_append) flags |= O_APPEND;
3024 else sc_error("invalid if-exists option");
3025
3026 fd = open_cloexec(c_string_buf(string_append_null()), flags);
3027 if (fd == -1) sc_perror1(R_EXPR);
3028 return make_port(fd, 1, DEFAULT_W_BUF);
3029}
3030
3031BUILTIN(builtin_close_in_port) {
3032 close_port(require_input_port(final_arg(args)));
3033 return SC_NULL;
3034}
3035
3036BUILTIN(builtin_close_out_port) {
3037 close_port(require_output_port(final_arg(args)));
3038 return SC_NULL;
3039}
3040
3041/* 6.6.2 Input */
3042
3043BUILTIN(builtin_read_char) { return read_char(opt_final_in_port_arg(args)); }
3044
3045BUILTIN(builtin_peek_char) { return peek_char(opt_final_in_port_arg(args)); }
3046
3047BUILTIN(builtin_is_eof) { return boolean(final_arg(args) == SC_EOF); }
3048
3049BUILTIN(builtin_is_char_ready) {
3050 return input_port_ready(opt_final_in_port_arg(args));
3051}
3052
3053/* 6.6.3 Output */
3054
3055BUILTIN(builtin_write_char) {
3056 uchar c = safe_char_val(extract_arg(&args));
3057 R_PORT = opt_final_out_port_arg(args);
3058 write_char(c);
3059 return SC_NULL;
3060}
3061
3062/* Gales Scheme extensions */
3063
3064BUILTIN(builtin_gscm_env) { no_args(args); return SC_GSCM_ENV; }
3065
3066BUILTIN(builtin_is_immutable) { return boolean(!is_mutable(final_arg(args))); }
3067
3068BUILTIN(builtin_cons_immutable) {
3069 R_CAR = extract_arg(&args);
3070 R_CDR = final_arg(args);
3071 return cons_immutable();
3072}
3073
3074BUILTIN(builtin_str_copy_immutable) {
3075 R_EXPR = require_string(final_arg(args));
3076 return string_copy_immutable();
3077}
3078
3079BUILTIN(builtin_vec_copy_immutable) {
3080 value len;
3081 R_EXPR = require_vector(final_arg(args));
3082 len = vector_len(R_EXPR);
3083 R_RESULT = make_immutable_vector(len);
3084 memcpy(heap+untag(R_RESULT)+1, heap+untag(R_EXPR)+1, len*sizeof(value));
3085 return R_RESULT;
3086}
3087
3088BUILTIN(builtin_flush_out_port) {
3089 value port = require_output_port(args == SC_NULL ? r_output_port :
3090 extract_arg(&args)), *p = heap+untag(port);
3091 int fd = fixnum_val(p[PORT_FD]);
3092 if (fd == -1) sc_error("output port closed");
3093 flush_if_needed(port);
3094 if (args != SC_NULL) {
3095 value opt = final_arg(args);
3096 if (opt == s_sync) { if (fsync(fd)) goto sync_err; }
3097 else if (opt == s_data_sync) { if (fdatasync(fd)) goto sync_err; }
3098 else sc_error1("invalid option:", opt);
3099 }
3100 return SC_NULL;
3101sync_err:
3102 if (errno == EINVAL) sc_error("synchronization not possible");
3103 else {
3104 /* As in flush_output_port: no good way to recover from output errors,
3105 * but the kernel won't necessarily continue returning errors, so close
3106 * the port. In practice, the mistake of retrying a failed fsync has
3107 * caused data loss in PostgreSQL (broken durability guarantee). */
3108 int saved = errno; set_port_closed(p); errno = saved;
3109 sc_perror();
3110 }
3111}
3112
3113BUILTIN(builtin_gc) {
3114 no_args(args);
3115 sc_gc();
3116 return fixnum(free_ptr);
3117}
3118
3119BUILTIN(builtin_is_fixnum) { return boolean(is_fixnum(final_arg(args))); }
3120
3121BUILTIN(builtin_fx_eq) {
3122 value a = require_fixnum(extract_arg(&args));
3123 return boolean(a == require_fixnum(final_arg(args)));
3124}
3125
3126BUILTIN(builtin_fx_lt) {
3127 long a = safe_fixnum_val(extract_arg(&args));
3128 return boolean(a < safe_fixnum_val(final_arg(args)));
3129}
3130
3131BUILTIN(builtin_fx_le) {
3132 long a = safe_fixnum_val(extract_arg(&args));
3133 return boolean(a <= safe_fixnum_val(final_arg(args)));
3134}
3135
3136BUILTIN(builtin_fx_lt_unsigned) {
3137 value a = require_fixnum(extract_arg(&args));
3138 return boolean(a < require_fixnum(final_arg(args)));
3139}
3140
3141BUILTIN(builtin_fx_le_unsigned) {
3142 value a = require_fixnum(extract_arg(&args));
3143 return boolean(a <= require_fixnum(final_arg(args)));
3144}
3145
3146/* inputs left tagged: valid for wrapping and bitwise ops */
3147#define FXFOLD(op, init) { \
3148 ulong acc = init; \
3149 for (; args != SC_NULL; args = cdr(args)) \
3150 acc = acc op require_fixnum(car(args)); \
3151 return fixnum(acc); \
3152}
3153
3154BUILTIN(builtin_fx_add_wrap) FXFOLD(+, 0)
3155
3156BUILTIN(builtin_fx_add_carry) {
3157 long acc = untag_signed(require_fixnum(extract_arg(&args)));
3158 acc += untag_signed(require_fixnum(extract_arg(&args)));
3159 if (args != SC_NULL) acc += untag_signed(require_fixnum(final_arg(args)));
3160 R_CDR = SC_NULL; R_CAR = fixnum(acc >> VAL_BITS); /* high word (carry) */
3161 R_CDR = cons(); R_CAR = fixnum(acc); /* low word */
3162 RETURN_VALUES(cons());
3163}
3164
3165BUILTIN(builtin_fx_add_carry_unsigned) {
3166 ulong acc = untag(require_fixnum(extract_arg(&args)));
3167 acc += untag(require_fixnum(extract_arg(&args)));
3168 if (args != SC_NULL) acc += untag(require_fixnum(final_arg(args)));
3169 R_CDR = SC_NULL; R_CAR = fixnum(acc >> VAL_BITS); /* high word (carry) */
3170 R_CDR = cons(); R_CAR = fixnum(acc); /* low word */
3171 RETURN_VALUES(cons());
3172}
3173
3174BUILTIN(builtin_fx_sub_wrap) {
3175 ulong acc = require_fixnum(extract_arg(&args));
3176 if (args == SC_NULL) return fixnum(-acc);
3177 do {
3178 acc -= require_fixnum(car(args));
3179 args = cdr(args);
3180 } while (args != SC_NULL);
3181 return fixnum(acc);
3182}
3183
3184BUILTIN(builtin_fx_sub_borrow_unsigned) {
3185 ulong acc = untag(require_fixnum(extract_arg(&args)));
3186 acc -= untag(require_fixnum(extract_arg(&args)));
3187 if (args != SC_NULL) acc -= untag(require_fixnum(final_arg(args)));
3188 R_CDR = SC_NULL; R_CAR = fixnum(-(((long)acc) >> VAL_BITS));
3189 R_CDR = cons(); R_CAR = fixnum(acc);
3190 RETURN_VALUES(cons());
3191}
3192
3193BUILTIN(builtin_fx_mul_wrap) FXFOLD(*, 1)
3194
3195BUILTIN(builtin_fx_mul_carry) {
3196 ulong a = untag_signed(require_fixnum(extract_arg(&args)));
3197 ulong b = untag_signed(require_fixnum(final_arg(args)));
3198 sc_wide_mul_signed(&a, &b);
3199 R_CDR = SC_NULL; R_CAR = fixnum(b << TAG_BITS | tag(a)); /* high word */
3200 R_CDR = cons(); R_CAR = fixnum(a); /* low word */
3201 RETURN_VALUES(cons());
3202}
3203
3204BUILTIN(builtin_fx_mul_carry_unsigned) {
3205 ulong a = untag(require_fixnum(extract_arg(&args)));
3206 ulong b = untag(require_fixnum(final_arg(args)));
3207 sc_wide_mul(&a, &b);
3208 R_CDR = SC_NULL; R_CAR = fixnum(b << TAG_BITS | tag(a)); /* high word */
3209 R_CDR = cons(); R_CAR = fixnum(a); /* low word */
3210 RETURN_VALUES(cons());
3211}
3212
3213BUILTIN(builtin_fxnot) {
3214 return fixnum(~require_fixnum(final_arg(args)));
3215}
3216
3217BUILTIN(builtin_fxand) FXFOLD(&, -1)
3218BUILTIN(builtin_fxior) FXFOLD(|, 0)
3219BUILTIN(builtin_fxxor) FXFOLD(^, 0)
3220
3221BUILTIN(builtin_fxif) {
3222 ulong mask = require_fixnum(extract_arg(&args));
3223 ulong a = require_fixnum(extract_arg(&args));
3224 ulong b = require_fixnum(final_arg(args));
3225 return fixnum(b ^ (mask & (a ^ b)));
3226 /* equivalent to (mask & a) | (~mask & b) */
3227}
3228
3229BUILTIN(builtin_fxmaj) {
3230 ulong a = require_fixnum(extract_arg(&args));
3231 ulong b = require_fixnum(extract_arg(&args));
3232 ulong c = require_fixnum(final_arg(args));
3233 return fixnum((a & (b | c)) | (b & c));
3234 /* equivalent to (a & b) | (a & c) | (b & c) */
3235}
3236
3237BUILTIN(builtin_fxshift) {
3238 long a = untag_signed(require_fixnum(extract_arg(&args)));
3239 long bits = untag_signed(require_fixnum(final_arg(args)));
3240 if (bits < 0) {
3241 if (bits <= -VAL_BITS) bits = -VAL_BITS+1;
3242 a >>= -bits;
3243 }
3244 else {
3245 if (bits >= VAL_BITS) a = 0;
3246 else a <<= bits;
3247 }
3248 return fixnum(a);
3249}
3250
3251BUILTIN(builtin_fxshift_unsigned) {
3252 ulong a = require_fixnum(extract_arg(&args));
3253 long bits = untag_signed(require_fixnum(final_arg(args)));
3254 if (bits < 0) {
3255 if (bits <= -VAL_BITS) a = 0;
3256 else a = untag(a) >> -bits;
3257 }
3258 else {
3259 if (bits >= VAL_BITS) a = 0;
3260 else a <<= bits;
3261 }
3262 return fixnum(a);
3263}
3264
3265BUILTIN(builtin_fxlength_unsigned) {
3266 /* TODO check existing interface alternatives */
3267 return fixnum(sc_bit_length(untag(require_fixnum(final_arg(args)))));
3268}
3269
3270/** (open-subprocess PROGRAM . ARGS) -> (values PID IN-PORT OUT-PORT)
3271 *
3272 * Executes PROGRAM in a Unix subprocess with the given arguments, returning
3273 * its process ID along with input and output ports piped to its standard
3274 * output and input streams respectively. Does not redirect standard error. By
3275 * convention, the first ARG should be the executable filename.
3276 *
3277 * This is intended to be fast and hygienic: it does not invoke the system
3278 * shell, perform a PATH search, pass through environment variables, or leak
3279 * file descriptors associated with ports previously opened in Scheme.
3280 *
3281 * Signals an error if a system-defined limit is reached, per fork(2) (or any
3282 * argument is not a string).
3283 *
3284 * The type of the returned PID is not specified, but must be composed of
3285 * standard types with unambiguous external representation.
3286 *
3287 * See also: wait-subprocess */
3288BUILTIN(builtin_open_subprocess) {
3289 value n_args = 0, i;
3290 char *path, **argv, *envp[] = {NULL};
3291 pid_t pid;
3292 int out_pipe[2], in_pipe[2];
3293
3294 require_args(args);
3295 r1 = args;
3296 /* begin allocation: null-terminated strings and argv */
3297 for (r2 = r1; r2 != SC_NULL; r2 = cdr(r2)) {
3298 R_EXPR = require_string(car(r2));
3299 R_EXPR = string_append_null();
3300 set_car(r2, R_EXPR);
3301 n_args++;
3302 }
3303 n_args--; /* program path not counted as argument */
3304 /* Caution: allocating C blob on the Scheme heap. Must not be reachable
3305 * from the roots, which in turn excludes further allocation while it's in
3306 * use. */
3307 argv = (void*)&heap[sc_malloc(n_args+1)];
3308 /* end allocation */
3309 path = c_string_buf(car(r1));
3310 r1 = cdr(r1); /* program args */
3311 for (i = 0; i < n_args; i++) {
3312 argv[i] = c_string_buf(car(r1));
3313 r1 = cdr(r1);
3314 }
3315 argv[i] = NULL;
3316
3317 if (pipe_cloexec(out_pipe)) goto err1;
3318 if (pipe_cloexec(in_pipe)) goto err2;
3319 /* Use vfork so child creation can be fast, and possible on non-overcommit
3320 * systems, even when parent is large. Any signal handlers must not corrupt
3321 * the parent if invoked in the child. See http://ewontfix.com/7/. */
3322 if ((pid = vfork()) == -1) goto err3;
3323 if (!pid) { /* child */
3324 while (dup2(out_pipe[0], 0) == -1) if (errno != EINTR) _exit(errno);
3325 while (dup2(in_pipe[1], 1) == -1) if (errno != EINTR) _exit(errno);
3326 execve(path, argv, envp);
3327 _exit(errno);
3328 }
3329 blind_close(out_pipe[0]);
3330 blind_close(in_pipe[1]);
3331
3332 /* resume allocation */
3333 R_CDR = SC_NULL; R_CAR = make_port(out_pipe[1], 1, DEFAULT_W_BUF);
3334 R_CDR = cons(); R_CAR = make_port(in_pipe[0], 0, DEFAULT_R_BUF);
3335 R_CDR = cons(); R_CAR = string(fmt_ulong_dec(pid));
3336 /* ^ pid_t can't be guaranteed to fit in a fixnum, so stringify. I can't
3337 * quite decipher POSIX here but it seems safe to assume it fits in a long
3338 * and is positive on success. */
3339 RETURN_VALUES(cons());
3340
3341err3:
3342 blind_close(in_pipe[0]);
3343 blind_close(in_pipe[1]);
3344err2:
3345 blind_close(out_pipe[0]);
3346 blind_close(out_pipe[1]);
3347err1:
3348 sc_perror();
3349}
3350
3351/** (wait-subprocess [PID]) -> STATUS
3352 *
3353 * Blocks until a subprocess has terminated, releases the associated resources,
3354 * and returns either the nonnegative integer exit status for normal exit or
3355 * the negative signal number for termination by signal.
3356 *
3357 * PID identifies the process to wait for; it must compare "equal?" to a PID
3358 * previously returned by open-subprocess for which status has not yet been
3359 * retrieved. If omitted, any subprocess is waited for. */
3360BUILTIN(builtin_wait_subprocess) {
3361 int status;
3362 pid_t pid;
3363 if (args == SC_NULL) pid = -1;
3364 else {
3365 /* dedicated parser for stringified PIDs (see above), yuck */
3366 value s = require_string(final_arg(args));
3367 value len = string_len(s), i;
3368 const uchar *b = string_buf(s);
3369 ulong acc = 0;
3370 if (!len) goto invalid;
3371 for (i = 0; i < len; i++) {
3372 uchar digit = b[i] - '0';
3373 if (digit > 9) goto invalid;
3374 if (acc > ULONG_MAX/10) goto invalid;
3375 acc *= 10;
3376 if (acc + digit < acc) goto invalid;
3377 acc += digit;
3378 }
3379 pid = acc;
3380 if ((ulong)pid != acc || pid < 0) goto invalid;
3381 goto start;
3382invalid:
3383 sc_error1("invalid PID:", s);
3384 }
3385start:
3386 if (waitpid(pid, &status, 0) == -1) {
3387 if (errno == EINTR) goto start;
3388 sc_perror();
3389 }
3390 if (WIFEXITED(status)) return fixnum(WEXITSTATUS(status));
3391 if (WIFSIGNALED(status)) return fixnum(-WTERMSIG(status));
3392 sc_error("unknown status type"); /* shouldn't happen */
3393}
3394
3395BUILTIN(builtin_read_token) {
3396 R_PORT = opt_final_in_port_arg(args);
3397 switch (read_token()) {
3398 case tok_eof: return SC_EOF;
3399 case tok_literal: R_CAR = s_literal; break;
3400 case tok_open_paren: R_CAR = s_open_paren; break;
3401 case tok_close_paren: R_CAR = s_close_paren; break;
3402 case tok_dot: R_CAR = s_dot; break;
3403 case tok_open_vector: R_CAR = s_open_vector; break;
3404 case tok_identifier: R_CAR = s_identifier; break;
3405 case tok_named_char: R_CAR = s_named_char; break;
3406 case tok_abbrev: R_CAR = s_abbrev; break;
3407 case tok_number: R_CAR = s_number; break;
3408 }
3409 R_CDR = R_LEXEME;
3410 return cons();
3411}
3412
3413BUILTIN(builtin_write_string) {
3414 value s = extract_arg(&args);
3415 R_PORT = opt_final_out_port_arg(args);
3416 write_str(require_stringlike(s));
3417 return SC_NULL;
3418}
3419
3420BUILTIN(builtin_write_string_quoted) {
3421 value s = extract_arg(&args);
3422 R_PORT = opt_final_out_port_arg(args);
3423 write_str_quoted(require_stringlike(s));
3424 return SC_NULL;
3425}
3426
3427/* Private builtins exposed to the toplevel and compiler only */
3428
3429#define assert_args(n) (assert(list_length(args) == (n)))
3430
3431/* Debug access to the privileged environment */
3432BUILTIN(builtin_toplevel_env) { no_args(args); return SC_TOPLEVEL_ENV; }
3433
3434/* (define-r5rs symbol obj)
3435 *
3436 * Binds a variable in the otherwise immutable (scheme-report-environment 5)
3437 * as well as the interaction environment. */
3438BUILTIN(builtin_define_r5rs) {
3439 R_CAR = R_VARNAME = require_symbol(extract_arg(&args));
3440 R_CDR = R_EXPR = final_arg(args);
3441 assert(global_frame_lookup(R_CAR, car(r5rs_env)) == SC_FALSE);
3442 assert(global_frame_lookup(R_CAR, car(interaction_env)) == SC_FALSE);
3443 R_ENV = r5rs_env;
3444 extend_global_env();
3445
3446 R_CAR = R_VARNAME;
3447 R_CDR = R_EXPR;
3448 R_ENV = interaction_env;
3449 extend_global_env();
3450 return SC_NULL;
3451}
3452
3453/* (define-gscm symbol obj)
3454 *
3455 * Binds a variable in the otherwise immutable (gales-scheme-environment) as
3456 * well as the interaction environment. */
3457BUILTIN(builtin_define_gscm) {
3458 value binding;
3459 R_VARNAME = require_symbol(extract_arg(&args));
3460 R_EXPR = final_arg(args);
3461 assert(global_frame_lookup(R_VARNAME, car(r5rs_env)) == SC_FALSE);
3462
3463 /* need to be able to upgrade ERROR on startup */
3464 binding = global_frame_lookup(R_VARNAME, car(gscm_env));
3465 if (binding == SC_FALSE) {
3466 R_CAR = R_VARNAME;
3467 R_CDR = R_EXPR;
3468 R_ENV = gscm_env;
3469 extend_global_env();
3470 }
3471 else set_cdr(binding, R_EXPR);
3472
3473 binding = global_frame_lookup(R_VARNAME, car(interaction_env));
3474 if (binding == SC_FALSE) {
3475 R_CAR = R_VARNAME;
3476 R_CDR = R_EXPR;
3477 R_ENV = interaction_env;
3478 extend_global_env();
3479 }
3480 else set_cdr(binding, R_EXPR);
3481 return SC_NULL;
3482}
3483
3484BUILTIN(builtin_set_in_port) {
3485 r_input_port = require_input_port(final_arg(args));
3486 return SC_NULL;
3487}
3488
3489BUILTIN(builtin_set_out_port) {
3490 r_output_port = require_output_port(final_arg(args));
3491 return SC_NULL;
3492}
3493
3494BUILTIN(builtin_push_winding) {
3495 err_context = "dynamic-wind";
3496 R_CAR = args;
3497 require_procedure(extract_arg(&args));
3498 set_cdr(R_CAR, require_procedure(final_arg(args)));
3499 R_CDR = r_spool;
3500 r_spool = cons();
3501 return SC_NULL;
3502}
3503
3504BUILTIN(builtin_variable_ref) {
3505 R_CAR = car(args);
3506 assert(cdr(args) == SC_NULL);
3507 return make_variable_ref();
3508}
3509
3510BUILTIN(builtin_apply_unchecked) {
3511 assert_args(2);
3512 R_PROC = car(args);
3513 R_ARGS = cadr(args);
3514 r_flag = f_apply;
3515 return SC_NULL;
3516}
3517
3518BUILTIN(builtin_car_unchecked) { assert_args(1); return car(car(args)); }
3519BUILTIN(builtin_cdr_unchecked) { assert_args(1); return cdr(car(args)); }
3520BUILTIN(builtin_set_car_unchecked) {
3521 assert_args(2); set_car(car(args), cadr(args)); return SC_NULL;
3522}
3523BUILTIN(builtin_set_cdr_unchecked) {
3524 assert_args(2); set_cdr(car(args), cadr(args)); return SC_NULL;
3525}
3526
3527BUILTIN(builtin_str_ref_unchecked) {
3528 assert_args(2); return character(
3529 string_buf(car(args))[fixnum_val(cadr(args))]);
3530}
3531BUILTIN(builtin_vec_ref_unchecked) {
3532 assert_args(2); return vector_ref(car(args), fixnum_val(cadr(args)));
3533}
3534
3535BUILTIN(builtin_fx_add_unchecked) {
3536 assert_args(2); return fixnum(unsigned_fixnum_val(car(args)) +
3537 unsigned_fixnum_val(cadr(args)));
3538}
3539BUILTIN(builtin_fx_sub_unchecked) {
3540 assert_args(2); return fixnum(unsigned_fixnum_val(car(args)) -
3541 unsigned_fixnum_val(cadr(args)));
3542}
3543BUILTIN(builtin_fx_eq_unchecked) {
3544 assert_args(2); assert(is_fixnum(car(args)) && is_fixnum(cadr(args)));
3545 return boolean(car(args) == cadr(args));
3546}
3547BUILTIN(builtin_fx_lt_unchecked) {
3548 assert_args(2); return boolean(fixnum_val(car(args)) <
3549 fixnum_val(cadr(args)));
3550}
3551BUILTIN(builtin_fx_le_unchecked) {
3552 assert_args(2); return boolean(fixnum_val(car(args)) <=
3553 fixnum_val(cadr(args)));
3554}
3555BUILTIN(builtin_fx_neg_unchecked) {
3556 assert_args(1); return fixnum(-fixnum_val(car(args)));
3557}
3558BUILTIN(builtin_is_fx_neg_unchecked) {
3559 assert_args(1); return boolean(fixnum_val(car(args)) < 0);
3560}
3561
3562BUILTIN(builtin_fx_div_unsigned_unchecked) {
3563 /* unsigned as / and % are implementation-defined on negatives */
3564 ulong a, b, q;
3565 assert_args(2);
3566 a = unsigned_fixnum_val(car(args));
3567 b = unsigned_fixnum_val(cadr(args));
3568 assert(b != 0);
3569 /* the compiler had better recognize this as one division... */
3570 q = a/b;
3571 a = a%b;
3572 R_CDR = SC_NULL; R_CAR = fixnum(a);
3573 R_CDR = cons(); R_CAR = fixnum(q);
3574 RETURN_VALUES(cons());
3575}
3576
3577BUILTIN(builtin_fx_div_ext_unsigned_unchecked) {
3578 /* unsigned as / and % are implementation-defined on negatives */
3579 ulong a_lo, a_hi, b;
3580 assert_args(3);
3581 a_lo = unsigned_fixnum_val(car(args)); args = cdr(args);
3582 a_hi = unsigned_fixnum_val(car(args)); args = cdr(args);
3583 b = unsigned_fixnum_val(car(args));
3584 assert(b > a_hi); /* so quotient fits in fixnum */
3585 a_lo |= a_hi << VAL_BITS;
3586 a_hi >>= TAG_BITS;
3587 sc_div_extended(&a_lo, &a_hi, b);
3588 R_CDR = SC_NULL; R_CAR = fixnum(a_lo); /* remainder */
3589 R_CDR = cons(); R_CAR = fixnum(a_hi); /* quotient */
3590 RETURN_VALUES(cons());
3591}
3592
3593BUILTIN(builtin_fixnum_to_dec_unchecked) {
3594 assert_args(1); return string(fmt_fixnum_dec(fixnum_val(car(args))));
3595}
3596BUILTIN(builtin_fixnum_to_hex_unchecked) {
3597 assert_args(1); return string(fmt_fixnum_hex(fixnum_val(car(args))));
3598}
3599BUILTIN(builtin_fixnum_to_oct_unchecked) {
3600 assert_args(1); return string(fmt_fixnum_oct(fixnum_val(car(args))));
3601}
3602BUILTIN(builtin_fixnum_to_bin_unchecked) {
3603 assert_args(1); return string(fmt_fixnum_bin(fixnum_val(car(args))));
3604}
3605BUILTIN(builtin_fixnum_to_bin_unsigned_unchecked) {
3606 assert_args(1);
3607 return string(fmt_ulong_bin(unsigned_fixnum_val(car(args))));
3608}
3609BUILTIN(builtin_flonum_to_dec_unchecked) {
3610 assert_args(1); return string(fmt_flonum_dec(flonum_val(car(args))));
3611}
3612
3613/* Minimal error builtin to be replaced on startup, e.g. in case of compile
3614 * errors in the toplevel */
3615BUILTIN(builtin_error) {
3616 value msg = require_string(extract_arg(&args));
3617 R_PORT = stdout_port;
3618 write_cstr("ERROR [startup]: ");
3619 write_str(msg);
3620 if (args != SC_NULL) {
3621 write_char(' ');
3622 R_EXPR = car(args);
3623 shallow_print();
3624 }
3625 newline();
3626 sc_exit(1);
3627}
3628
3629BUILTIN(builtin_set_err_cont) {
3630 value h = final_arg(args);
3631 if (!is_continuation(h)) sc_error("not a continuation");
3632 r_error_cont = h;
3633 return SC_NULL;
3634}
3635
3636BUILTIN(builtin_socket_ports) {
3637 make_socket_ports(safe_fixnum_val(final_arg(args)),
3638 DEFAULT_R_BUF, DEFAULT_W_BUF);
3639 R_CDR = SC_NULL; R_CAR = r1;
3640 R_CDR = cons(); R_CAR = r0;
3641 RETURN_VALUES(cons());
3642}
3643
3644static union {
3645 struct sockaddr sa;
3646 struct sockaddr_in sin;
3647 struct sockaddr_un sun;
3648} sa;
3649
3650static socklen_t sa_len;
3651
3652/* Fill sa/sa_len from a Scheme IPv4 address structure */
3653static void build_sockaddr_in(value addr) {
3654 value ip = require_vector(safe_car(addr)),
3655 port = safe_fixnum_val(safe_car(cdr(addr))), i, byte;
3656 uchar *port_buf = (uchar *)&sa.sin.sin_port,
3657 *addr_buf = (uchar *)&sa.sin.sin_addr;
3658 if (port > 65535) sc_error1("port number out of range:", car(cdr(addr)));
3659 memset(&sa.sin, 0, sizeof sa.sin);
3660 sa.sin.sin_family = AF_INET;
3661 port_buf[0] = port >> 8;
3662 port_buf[1] = port & 0xFF;
3663 if (vector_len(ip) != 4) sc_error("bad address length");
3664 for (i = 0; i < 4; ++i) {
3665 byte = safe_fixnum_val(vector_ref(ip, i));
3666 if (byte > 255)
3667 sc_error1("address byte out of range:", vector_ref(ip, i));
3668 addr_buf[i] = byte;
3669 }
3670 sa_len = sizeof sa.sin;
3671}
3672
3673/* Fill sa/sa_len from a Scheme Unix-domain address structure (string) */
3674static void build_sockaddr_un(value addr) {
3675 value path = require_string(addr), len = string_len(path), i;
3676 uchar *buf = string_buf(path);
3677 if (len > sizeof sa.sun.sun_path) sc_error("oversize pathname");
3678 /* initial NUL allowed for Linux abstract sockets */
3679 if (len && buf[0])
3680 for (i = 1; i < len; i++)
3681 if (!buf[i]) sc_error("NUL byte in pathname");
3682 memset(&sa.sun, 0, sizeof sa.sun);
3683 sa.sun.sun_family = AF_UNIX;
3684 memcpy(&sa.sun.sun_path, string_buf(path), len);
3685 sa_len = offsetof(struct sockaddr_un, sun_path) + len;
3686}
3687
3688/* Construct immutable Scheme address structure from a struct sockaddr_* in
3689 * sa/sa_len. Side effects: R_CAR R_CDR */
3690static value parse_sockaddr(void) {
3691 if (sa.sa.sa_family == AF_INET) {
3692 int i;
3693 uchar *port_buf = (uchar *)&sa.sin.sin_port,
3694 *addr_buf = (uchar *)&sa.sin.sin_addr;
3695 R_CDR = SC_NULL;
3696 R_CAR = fixnum((port_buf[0] << 8) + port_buf[1]);
3697 R_CDR = cons_immutable();
3698 R_CAR = make_immutable_vector(4);
3699 for (i = 0; i < 4; ++i) vector_set(R_CAR, i, fixnum(addr_buf[i]));
3700 return cons_immutable();
3701 }
3702 else if (sa.sa.sa_family == AF_UNIX) {
3703 value path, path_len;
3704 if (sa_len > sizeof sa.sun) sc_error("oversize pathname?!");
3705 /* XXX Linuxism; the data returned for unnamed sockets is unspecified
3706 * in the standards */
3707 if (sa_len == sizeof(sa_family_t)) return SC_FALSE;
3708 /* Possible somewhere? */
3709 if (sa_len <= offsetof(struct sockaddr_un, sun_path)) return SC_FALSE;
3710 path_len = sa_len - offsetof(struct sockaddr_un, sun_path);
3711 /* Some implementations are so rude as to append a trailing NUL and
3712 * include it in the length. But a singular NUL is a valid abstract
3713 * socket name on Linux. */
3714 if (path_len > 1 && sa.sun.sun_path[0] && !sa.sun.sun_path[path_len-1])
3715 --path_len;
3716 path = make_immutable_string(path_len);
3717 memcpy(string_buf(path), sa.sun.sun_path, path_len);
3718 return path;
3719 }
3720 sc_error("unknown address family");
3721}
3722
3723static value unbound_socket(int domain, int type) {
3724 return fixnum(chkp(socket(domain, type, 0)));
3725}
3726
3727static value bound_socket(int domain, int type, int reuse) {
3728 int fd = chkp(socket(domain, type, 0));
3729 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof reuse) ||
3730 bind(fd, &sa.sa, sa_len)) {
3731 blind_close(fd);
3732 sc_perror();
3733 }
3734 return fixnum(fd);
3735}
3736
3737BUILTIN(builtin_inet_stream_sock) {
3738 if (args != SC_NULL) {
3739 build_sockaddr_in(final_arg(args));
3740 return bound_socket(AF_INET, SOCK_STREAM, 0);
3741 }
3742 return unbound_socket(AF_INET, SOCK_STREAM);
3743}
3744
3745BUILTIN(builtin_inet_dgram_sock) {
3746 if (args != SC_NULL) {
3747 build_sockaddr_in(final_arg(args));
3748 return bound_socket(AF_INET, SOCK_DGRAM, 0);
3749 }
3750 return unbound_socket(AF_INET, SOCK_DGRAM);
3751}
3752
3753BUILTIN(builtin_unix_stream_sock) {
3754 if (args != SC_NULL) {
3755 build_sockaddr_un(final_arg(args));
3756 return bound_socket(AF_UNIX, SOCK_STREAM, 0);
3757 }
3758 return unbound_socket(AF_UNIX, SOCK_STREAM);
3759}
3760
3761BUILTIN(builtin_unix_dgram_sock) {
3762 if (args != SC_NULL) {
3763 build_sockaddr_un(final_arg(args));
3764 return bound_socket(AF_UNIX, SOCK_DGRAM, 0);
3765 }
3766 return unbound_socket(AF_UNIX, SOCK_DGRAM);
3767}
3768
3769BUILTIN(builtin_getsockname) {
3770 uint fd = safe_fixnum_val(final_arg(args));
3771 sa_len = sizeof sa;
3772 chkp(getsockname(fd, &sa.sa, &sa_len));
3773 return parse_sockaddr();
3774}
3775
3776BUILTIN(builtin_getpeername) {
3777 uint fd = safe_fixnum_val(final_arg(args));
3778 sa_len = sizeof sa;
3779 chkp(getpeername(fd, &sa.sa, &sa_len));
3780 return parse_sockaddr();
3781}
3782
3783BUILTIN(builtin_connect_inet) {
3784 uint fd = safe_fixnum_val(extract_arg(&args));
3785 build_sockaddr_in(final_arg(args));
3786 chkp(connect(fd, &sa.sa, sa_len));
3787 return SC_NULL;
3788}
3789
3790BUILTIN(builtin_connect_unix) {
3791 uint fd = safe_fixnum_val(extract_arg(&args));
3792 build_sockaddr_un(final_arg(args));
3793 chkp(connect(fd, &sa.sa, sa_len));
3794 return SC_NULL;
3795}
3796
3797BUILTIN(builtin_listen) {
3798 uint fd = safe_fixnum_val(extract_arg(&args));
3799 long backlog = safe_fixnum_val(final_arg(args));
3800 if (backlog < 0) sc_error("negative backlog");
3801 if (backlog > INT_MAX) backlog = INT_MAX;
3802 chkp(listen(fd, backlog));
3803 return SC_NULL;
3804}
3805
3806BUILTIN(builtin_accept) {
3807 uint fd = safe_fixnum_val(final_arg(args));
3808 return fixnum(chkp(accept(fd, 0, 0)));
3809}
3810
3811BUILTIN(builtin_close) {
3812 chkp(close(safe_fixnum_val(final_arg(args))));
3813 return SC_NULL;
3814}
3815
3816BUILTIN(builtin_is_flonum) { return boolean(is_flonum(final_arg(args))); }
3817
3818/* NB: "if the value being converted is in the range of values that can be
3819 * represented but cannot be represented exactly, the result is either the
3820 * nearest higher or nearest lower value, chosen in an implementation-defined
3821 * manner." -C89 */
3822BUILTIN(builtin_flonum_unchecked) {
3823 assert_args(1); return flonum(fixnum_val(car(args)));
3824}
3825BUILTIN(builtin_flonum_unsigned_unchecked) {
3826 assert_args(1); return flonum(unsigned_fixnum_val(car(args)));
3827}
3828
3829BUILTIN(builtin_flo_eq_unchecked) {
3830 assert_args(2);
3831 return boolean(flonum_val(car(args)) == flonum_val(cadr(args)));
3832}
3833BUILTIN(builtin_flo_lt_unchecked) {
3834 assert_args(2);
3835 return boolean(flonum_val(car(args)) < flonum_val(cadr(args)));
3836}
3837BUILTIN(builtin_flo_le_unchecked) {
3838 assert_args(2);
3839 return boolean(flonum_val(car(args)) <= flonum_val(cadr(args)));
3840}
3841BUILTIN(builtin_flo_neg_unchecked) {
3842 assert_args(1); return flonum(-flonum_val(car(args)));
3843}
3844BUILTIN(builtin_is_flo_neg_unchecked) {
3845 assert_args(1); return boolean(flonum_val(car(args)) < 0);
3846}
3847
3848#define FLONUM_OP2(op) { \
3849 assert_args(2); \
3850 return flonum(flonum_val(car(args)) op flonum_val(cadr(args))); \
3851}
3852
3853BUILTIN(builtin_flo_add_unchecked) FLONUM_OP2(+)
3854BUILTIN(builtin_flo_sub_unchecked) FLONUM_OP2(-)
3855BUILTIN(builtin_flo_mul_unchecked) FLONUM_OP2(*)
3856BUILTIN(builtin_flo_div_unchecked) FLONUM_OP2(/)
3857
3858BUILTIN(builtin_flo_quotient_unchecked) {
3859 assert_args(2);
3860 return flonum(trunc(flonum_val(car(args)) / flonum_val(cadr(args))));
3861}
3862
3863BUILTIN(builtin_flo_remainder_unchecked) {
3864 double a, b;
3865 assert_args(2);
3866 a = flonum_val(car(args));
3867 b = flonum_val(cadr(args));
3868 return flonum(a < 0 ? -fmod(-a, fabs(b)) : fmod(a, fabs(b)));
3869}
3870
3871BUILTIN(builtin_frac_exp_unchecked) {
3872 int e;
3873 double frac;
3874 assert_args(1);
3875 frac = frexp(flonum_val(car(args)), &e);
3876 R_CDR = SC_NULL; R_CAR = fixnum(e);
3877 R_CDR = cons(); R_CAR = flonum(frac);
3878 RETURN_VALUES(cons());
3879}
3880
3881BUILTIN(builtin_load_exp_unchecked) {
3882 assert_args(2);
3883 return flonum(ldexp(flonum_val(car(args)), fixnum_val(cadr(args))));
3884}
3885
3886BUILTIN(builtin_is_inf_unchecked) {
3887 double d;
3888 assert_args(1);
3889 d = flonum_val(car(args));
3890 return boolean(d == HUGE_VAL || d == -HUGE_VAL);
3891}
3892
3893BUILTIN(builtin_flo_to_fix_unchecked) {
3894 double d;
3895 assert_args(1);
3896 d = flonum_val(car(args));
3897 assert(fabs(d) <= (double)(1L << VAL_BITS));
3898 /* ^ Catches overflow of double to long conversion, which is UB, though
3899 * not of long to fixnum (how tight the check can be made is not yet clear
3900 * to me.) */
3901 return fixnum(d);
3902}
3903
3904#define MATH_FUNC(f) { \
3905 assert_args(1); return flonum(f(flonum_val(car(args)))); \
3906}
3907BUILTIN(builtin_floor) MATH_FUNC(floor)
3908BUILTIN(builtin_ceiling) MATH_FUNC(ceil)
3909BUILTIN(builtin_truncate) MATH_FUNC(trunc)
3910BUILTIN(builtin_round) MATH_FUNC(nearbyint)
3911BUILTIN(builtin_exp) MATH_FUNC(exp)
3912BUILTIN(builtin_log) MATH_FUNC(log)
3913BUILTIN(builtin_sin) MATH_FUNC(sin)
3914BUILTIN(builtin_cos) MATH_FUNC(cos)
3915BUILTIN(builtin_tan) MATH_FUNC(tan)
3916BUILTIN(builtin_asin) MATH_FUNC(asin)
3917BUILTIN(builtin_acos) MATH_FUNC(acos)
3918BUILTIN(builtin_atan) MATH_FUNC(atan)
3919BUILTIN(builtin_atan2) {
3920 assert_args(2);
3921 return flonum(atan2(flonum_val(car(args)), flonum_val(cadr(args))));
3922}
3923BUILTIN(builtin_sqrt) MATH_FUNC(sqrt)
3924
3925BUILTIN(builtin_rev_list_to_vec_unchecked) {
3926 assert_args(1);
3927 R_EXPR = car(args);
3928 return rev_list_to_vec();
3929}
3930
3931BUILTIN(builtin_is_builtin) {
3932 return boolean(is_builtin(final_arg(args)));
3933}
3934BUILTIN(builtin_builtin_name) {
3935 value b = final_arg(args);
3936 if (!is_builtin(b)) sc_error("not a builtin");
3937 return string(builtin_name(b));
3938}
3939BUILTIN(builtin_is_promise) {
3940 return boolean(is_promise(final_arg(args)));
3941}
3942BUILTIN(builtin_is_continuation) {
3943 return boolean(is_continuation(final_arg(args)));
3944}
3945
3946BUILTIN(builtin_make_bignum) {
3947 assert_args(1);
3948 /* Returning uninitialized is safe for the garbage collector: bignums are
3949 * not scanned internally, though the words do keep their fixnum tags. Of
3950 * course, used memory is still being exposed; the privileged bignum
3951 * library is responsible for fully initializing or truncating. */
3952 return make_bignum_uninit(fixnum_val(car(args)), 0);
3953}
3954BUILTIN(builtin_is_bignum) {
3955 assert_args(1); return boolean(is_bignum(car(args)));
3956}
3957BUILTIN(builtin_is_bignum_negative) {
3958 assert_args(1); return boolean(is_bignum_negative(car(args)));
3959}
3960BUILTIN(builtin_bignum_set_negative) {
3961 assert_args(1); return bignum_set_negative(car(args));
3962}
3963BUILTIN(builtin_bignum_ref) {
3964 assert_args(2); return bignum_ref(car(args), fixnum_val(cadr(args)));
3965}
3966BUILTIN(builtin_bignum_set) {
3967 value bn;
3968 assert_args(3);
3969 bn = car(args); args = cdr(args);
3970 bignum_set(bn, fixnum_val(car(args)), cadr(args));
3971 return SC_NULL;
3972}
3973BUILTIN(builtin_bignum_length) {
3974 assert_args(1); return fixnum(bignum_len(car(args)));
3975}
3976BUILTIN(builtin_bignum_truncate) {
3977 assert_args(2); return bignum_truncate(car(args), fixnum_val(cadr(args)));
3978}
3979
3980/* Construct bignum from signed fixnum, not demoting. */
3981BUILTIN(builtin_bignum) {
3982 value bn, word, word_sign_bit, word_sign_ext;
3983 assert_args(1);
3984 /* branch-free conversion from two's complement to sign-magnitude */
3985 word = fixnum_val(car(args));
3986 word_sign_bit = word >> ((8*sizeof word)-1);
3987 word_sign_ext = ((long)word) >> ((8*sizeof word)-1);
3988 word = (word ^ word_sign_ext) + word_sign_bit;
3989 bn = make_bignum_uninit(1, word_sign_bit);
3990 bignum_set(bn, 0, fixnum(word));
3991 return bn;
3992}
3993
3994/* Construct bignum from unsigned fixnum, not demoting. */
3995BUILTIN(builtin_bignum_unsigned) {
3996 value bn, word;
3997 assert_args(1);
3998 word = car(args);
3999 bn = make_bignum_uninit(1, 0);
4000 bignum_set(bn, 0, word);
4001 return bn;
4002}
4003
4004/* Construct bignum from 2-word signed quantity, normalizing and demoting to
4005 * fixnum when possible. */
4006BUILTIN(builtin_bignum2) {
4007 value bn;
4008 long lo, hi;
4009 int neg = 0;
4010 assert_args(2);
4011 lo = fixnum_val(car(args));
4012 hi = fixnum_val(cadr(args));
4013 /* in signed fixnum range if high word is sign extension of low */
4014 if (lo >> (VAL_BITS - 1) == hi) return fixnum(lo);
4015 if (hi < 0) {
4016 /* convert from two's complement to sign-magnitude */
4017 neg = 1;
4018 /* capture carry bit in the tag by setting it to all ones prior to
4019 * complement */
4020 lo = -(lo | (-1L << VAL_BITS));
4021 hi = ~(ulong)hi + (((ulong)lo) >> VAL_BITS);
4022 }
4023 if (hi == 0) {
4024 /* need to drop high word to normalize */
4025 bn = make_bignum_uninit(1, neg);
4026 bignum_set(bn, 0, fixnum(lo));
4027 }
4028 else {
4029 /* both words significant */
4030 bn = make_bignum_uninit(2, neg);
4031 bignum_set(bn, 0, fixnum(lo));
4032 bignum_set(bn, 1, fixnum(hi));
4033 }
4034 return bn;
4035}
4036
4037
4038/****************
4039 * Initialization
4040 */
4041
4042/* Construct a builtin and define it in the top frame of R_ENV.
4043 * Side effects: R_CAR R_CDR */
4044static void add_builtin(const char *name, builtin_func_t func) {
4045 R_CAR = symbol(name);
4046 R_CDR = builtin(name, func);
4047 assert(global_frame_lookup(R_CAR, car(R_ENV)) == SC_FALSE);
4048 extend_global_env();
4049}
4050
4051/* Define a variable in the top frame of R_ENV.
4052 * Side effects: R_EXPR R_CAR R_CDR */
4053static void add_variable(const char *name, value val) {
4054 R_EXPR = val;
4055 R_CAR = symbol(name);
4056 R_CDR = R_EXPR;
4057 assert(global_frame_lookup(R_CAR, car(R_ENV)) == SC_FALSE);
4058 extend_global_env();
4059}
4060
4061/* Side effects: R_RESULT */
4062static value open_lib_file(const char *filename) {
4063 int fd = open_cloexec(filename, O_RDONLY);
4064 if (fd == -1) fatal1(filename, strerror(errno));
4065 return make_port(fd, 0, DEFAULT_R_BUF);
4066}
4067
4068uint sc_hugepages;
4069
4070void sc_init(value heap_alloc) {
4071 int mflags;
4072 assert(sizeof(value) == __SIZEOF_POINTER__);
4073 assert(sizeof(value) == sizeof(ulong));
4074
4075 mflags = MAP_PRIVATE | MAP_ANON;
4076 if (sc_hugepages) {
4077#ifdef MAP_HUGETLB
4078 mflags |= MAP_HUGETLB;
4079#else
4080 fatal("huge pages not supported");
4081#endif
4082 }
4083 heap = mmap(NULL, heap_alloc, PROT_READ | PROT_WRITE, mflags, -1, 0);
4084 if (heap == MAP_FAILED) fatal1("failed to map heap", strerror(errno));
4085 heap_size = heap_alloc / sizeof(value) / 2;
4086 new_heap = heap + heap_size;
4087
4088 gc_root(&r0);
4089 gc_root(&r1);
4090 gc_root(&r2);
4091 gc_root(&r3);
4092 gc_root(&r4);
4093 gc_root(&r5);
4094 gc_root(&r6);
4095 gc_root(&r_stack);
4096 gc_root(&r_spool);
4097 gc_root(&r_error_cont);
4098 gc_root(&r_signal_handler);
4099 gc_root(&r_compiler);
4100 gc_root(&r_compiler_expr);
4101 gc_root(&r_input_port);
4102 gc_root(&r_output_port);
4103 gc_root(&r_dump);
4104 gc_root(&stdin_port);
4105 gc_root(&stdout_port);
4106 gc_root(&symbols);
4107 gc_root(&s_lambda);
4108 gc_root(&s_quote);
4109 gc_root(&s_quasiquote);
4110 gc_root(&s_unquote);
4111 gc_root(&s_unquote_splicing);
4112 gc_root(&s_if);
4113 gc_root(&s_set);
4114 gc_root(&s_begin);
4115 gc_root(&s_letrec);
4116 gc_root(&s_define);
4117 gc_root(&s_delay);
4118 gc_root(&s_literal);
4119 gc_root(&s_open_paren);
4120 gc_root(&s_close_paren);
4121 gc_root(&s_dot);
4122 gc_root(&s_open_vector);
4123 gc_root(&s_identifier);
4124 gc_root(&s_named_char);
4125 gc_root(&s_abbrev);
4126 gc_root(&s_number);
4127 gc_root(&s_truncate);
4128 gc_root(&s_overwrite);
4129 gc_root(&s_append);
4130 gc_root(&s_sync);
4131 gc_root(&s_data_sync);
4132 gc_root(&r5rs_env);
4133 gc_root(&gscm_env);
4134 gc_root(&interaction_env);
4135 gc_root(&toplevel_env);
4136
4137 r_input_port = stdin_port = make_port(0, 0, DEFAULT_R_BUF);
4138 r_output_port = stdout_port = make_port(1, 1, DEFAULT_W_BUF);
4139 stdout_ready = 1;
4140 fixnum_zero = fixnum(0);
4141 fixnum_one = fixnum(1);
4142
4143 s_lambda = symbol("lambda");
4144 s_quote = symbol("quote");
4145 s_quasiquote = symbol("quasiquote");
4146 s_unquote = symbol("unquote");
4147 s_unquote_splicing = symbol("unquote-splicing");
4148 s_if = symbol("if");
4149 s_set = symbol("set!");
4150 s_begin = symbol("begin");
4151 s_letrec = symbol("letrec");
4152 s_define = symbol("define");
4153 s_delay = symbol("delay");
4154 s_literal = symbol("literal");
4155 s_open_paren = symbol("open-paren");
4156 s_close_paren = symbol("close-paren");
4157 s_dot = symbol("dot");
4158 s_open_vector = symbol("open-vector");
4159 s_identifier = symbol("identifier");
4160 s_named_char = symbol("named-char");
4161 s_abbrev = symbol("abbrev");
4162 s_number = symbol("number");
4163 s_truncate = symbol("truncate");
4164 s_overwrite = symbol("overwrite");
4165 s_append = symbol("append");
4166 s_sync = symbol("sync");
4167 s_data_sync = symbol("data-sync");
4168
4169 R_CAR = R_CDR = SC_NULL;
4170 R_ENV = r5rs_env = cons();
4171 add_builtin("eq?", builtin_is_eq);
4172 add_builtin("number?", builtin_is_number);
4173 add_builtin("complex?", builtin_is_number);
4174 add_builtin("real?", builtin_is_number);
4175 add_builtin("rational?", builtin_is_number);
4176 add_builtin("integer?", builtin_is_integer);
4177 add_builtin("exact?", builtin_is_exact);
4178 add_builtin("inexact?", builtin_is_inexact);
4179 add_builtin("not", builtin_not);
4180 add_builtin("boolean?", builtin_is_boolean);
4181 add_builtin("pair?", builtin_is_pair);
4182 add_builtin("cons", builtin_cons);
4183 add_builtin("car", builtin_car);
4184 add_builtin("cdr", builtin_cdr);
4185 add_builtin("caar", builtin_caar);
4186 add_builtin("cadr", builtin_cadr);
4187 add_builtin("cdar", builtin_cdar);
4188 add_builtin("cddr", builtin_cddr);
4189 add_builtin("caaar", builtin_caaar);
4190 add_builtin("caadr", builtin_caadr);
4191 add_builtin("cadar", builtin_cadar);
4192 add_builtin("caddr", builtin_caddr);
4193 add_builtin("cdaar", builtin_cdaar);
4194 add_builtin("cdadr", builtin_cdadr);
4195 add_builtin("cddar", builtin_cddar);
4196 add_builtin("cdddr", builtin_cdddr);
4197 add_builtin("caaaar", builtin_caaaar);
4198 add_builtin("caaadr", builtin_caaadr);
4199 add_builtin("caadar", builtin_caadar);
4200 add_builtin("caaddr", builtin_caaddr);
4201 add_builtin("cadaar", builtin_cadaar);
4202 add_builtin("cadadr", builtin_cadadr);
4203 add_builtin("caddar", builtin_caddar);
4204 add_builtin("cadddr", builtin_cadddr);
4205 add_builtin("cdaaar", builtin_cdaaar);
4206 add_builtin("cdaadr", builtin_cdaadr);
4207 add_builtin("cdadar", builtin_cdadar);
4208 add_builtin("cdaddr", builtin_cdaddr);
4209 add_builtin("cddaar", builtin_cddaar);
4210 add_builtin("cddadr", builtin_cddadr);
4211 add_builtin("cdddar", builtin_cdddar);
4212 add_builtin("cddddr", builtin_cddddr);
4213 add_builtin("set-car!", builtin_set_car);
4214 add_builtin("set-cdr!", builtin_set_cdr);
4215 add_builtin("null?", builtin_is_null);
4216 add_builtin("list?", builtin_is_list);
4217 add_builtin("length", builtin_length);
4218 add_builtin("symbol?", builtin_is_symbol);
4219 add_builtin("symbol->string", builtin_sym_to_str);
4220 add_builtin("string->symbol", builtin_str_to_sym);
4221 add_builtin("char?", builtin_is_char);
4222 add_builtin("char=?", builtin_char_eq);
4223 add_builtin("char<?", builtin_char_lt);
4224 add_builtin("char>?", builtin_char_gt);
4225 add_builtin("char<=?", builtin_char_le);
4226 add_builtin("char>=?", builtin_char_ge);
4227 add_builtin("char-ci=?", builtin_char_ci_eq);
4228 add_builtin("char-ci<?", builtin_char_ci_lt);
4229 add_builtin("char-ci>?", builtin_char_ci_gt);
4230 add_builtin("char-ci<=?", builtin_char_ci_le);
4231 add_builtin("char-ci>=?", builtin_char_ci_ge);
4232 add_builtin("char-alphabetic?", builtin_char_is_alpha);
4233 add_builtin("char-numeric?", builtin_char_is_num);
4234 add_builtin("char-whitespace?", builtin_char_is_white);
4235 add_builtin("char-upper-case?", builtin_char_is_upper);
4236 add_builtin("char-lower-case?", builtin_char_is_lower);
4237 add_builtin("char->integer", builtin_char_to_int);
4238 add_builtin("integer->char", builtin_int_to_char);
4239 add_builtin("char-upcase", builtin_char_upcase);
4240 add_builtin("char-downcase", builtin_char_downcase);
4241 add_builtin("string?", builtin_is_str);
4242 add_builtin("make-string", builtin_make_str);
4243 add_builtin("string-length",builtin_str_length);
4244 add_builtin("string-ref", builtin_str_ref);
4245 add_builtin("string-set!", builtin_str_set);
4246 add_builtin("string=?", builtin_str_eq);
4247 add_builtin("string<?", builtin_str_lt);
4248 add_builtin("string>?", builtin_str_gt);
4249 add_builtin("string<=?", builtin_str_le);
4250 add_builtin("string>=?", builtin_str_ge);
4251 add_builtin("string-ci=?", builtin_str_ci_eq);
4252 add_builtin("string-ci<?", builtin_str_ci_lt);
4253 add_builtin("string-ci>?", builtin_str_ci_gt);
4254 add_builtin("string-ci<=?", builtin_str_ci_le);
4255 add_builtin("string-ci>=?", builtin_str_ci_ge);
4256 add_builtin("substring", builtin_substr);
4257 add_builtin("string-append",builtin_str_append);
4258 add_builtin("list->string", builtin_list_to_str);
4259 add_builtin("string-copy", builtin_str_copy);
4260 add_builtin("string-fill!", builtin_str_fill);
4261 add_builtin("vector?", builtin_is_vector);
4262 add_builtin("make-vector", builtin_make_vector);
4263 add_builtin("vector-length",builtin_vec_length);
4264 add_builtin("vector-ref", builtin_vec_ref);
4265 add_builtin("vector-set!", builtin_vec_set);
4266 add_builtin("list->vector", builtin_list_to_vec);
4267 add_builtin("vector-fill!", builtin_vec_fill);
4268 add_builtin("procedure?", builtin_is_procedure);
4269 add_builtin("force", builtin_force);
4270 add_builtin("call-with-current-continuation", builtin_call_cc);
4271 add_builtin("call/cc", builtin_call_cc);
4272 add_builtin("values", builtin_values);
4273 add_builtin("call-with-values", builtin_call_with_values);
4274 add_builtin("eval", builtin_eval);
4275 add_builtin("scheme-report-environment", builtin_report_env);
4276 add_builtin("null-environment", builtin_null_env);
4277 add_builtin("interaction-environment", builtin_interaction_env);
4278 add_builtin("port?", builtin_is_port);
4279 add_builtin("input-port?", builtin_is_in_port);
4280 add_builtin("output-port?", builtin_is_out_port);
4281 add_builtin("current-input-port", builtin_current_in_port);
4282 add_builtin("current-output-port", builtin_current_out_port);
4283 add_builtin("open-input-file", builtin_open_in_file);
4284 add_builtin("open-output-file", builtin_open_out_file);
4285 add_builtin("close-input-port", builtin_close_in_port);
4286 add_builtin("close-output-port", builtin_close_out_port);
4287 add_builtin("read-char", builtin_read_char);
4288 add_builtin("peek-char", builtin_peek_char);
4289 add_builtin("eof-object?", builtin_is_eof);
4290 add_builtin("char-ready?", builtin_is_char_ready);
4291 add_builtin("write-char", builtin_write_char);
4292
4293 /* Immutable environment for extensions */
4294 R_CAR = SC_NULL; R_CDR = r5rs_env;
4295 R_ENV = gscm_env = cons();
4296 add_builtin("gales-scheme-environment", builtin_gscm_env);
4297 add_builtin("immutable?", builtin_is_immutable);
4298 add_builtin("cons/immutable", builtin_cons_immutable);
4299 add_builtin("string-copy/immutable", builtin_str_copy_immutable);
4300 add_builtin("vector-copy/immutable", builtin_vec_copy_immutable);
4301 add_builtin("flush-output-port", builtin_flush_out_port);
4302 add_builtin("error", builtin_error);
4303 add_builtin("gc", builtin_gc);
4304 add_variable("*fixnum-width*", fixnum(VAL_BITS));
4305 add_variable("*greatest-fixnum*", fixnum(FIXNUM_MAX));
4306 add_variable("*least-fixnum*", fixnum(FIXNUM_MIN));
4307 add_builtin("fixnum?", builtin_is_fixnum);
4308 add_builtin("fx=", builtin_fx_eq);
4309 add_builtin("fx<", builtin_fx_lt);
4310 add_builtin("fx<=", builtin_fx_le);
4311 add_builtin("fx</unsigned", builtin_fx_lt_unsigned);
4312 add_builtin("fx<=/unsigned", builtin_fx_le_unsigned);
4313 add_builtin("fx+/wrap", builtin_fx_add_wrap);
4314 add_builtin("fx+/carry", builtin_fx_add_carry);
4315 add_builtin("fx+/carry-unsigned", builtin_fx_add_carry_unsigned);
4316 add_builtin("fx-/wrap", builtin_fx_sub_wrap);
4317 add_builtin("fx-/borrow-unsigned", builtin_fx_sub_borrow_unsigned);
4318 add_builtin("fx*/wrap", builtin_fx_mul_wrap);
4319 add_builtin("fx*/carry", builtin_fx_mul_carry);
4320 add_builtin("fx*/carry-unsigned", builtin_fx_mul_carry_unsigned);
4321 add_builtin("fxnot", builtin_fxnot);
4322 add_builtin("fxand", builtin_fxand);
4323 add_builtin("fxior", builtin_fxior);
4324 add_builtin("fxxor", builtin_fxxor);
4325 add_builtin("fxif", builtin_fxif);
4326 add_builtin("fxmaj", builtin_fxmaj);
4327 add_builtin("fxshift", builtin_fxshift);
4328 add_builtin("fxshift/unsigned", builtin_fxshift_unsigned);
4329 add_builtin("fxlength/unsigned", builtin_fxlength_unsigned);
4330 add_builtin("open-subprocess", builtin_open_subprocess);
4331 add_builtin("wait-subprocess", builtin_wait_subprocess);
4332 add_builtin("read-token", builtin_read_token);
4333 add_builtin("write-string", builtin_write_string);
4334 add_builtin("write-string/quoted", builtin_write_string_quoted);
4335
4336 /* The interaction environment is a mutable copy of the Scheme report
4337 * environment plus extensions */
4338 R_EXPR = car(r5rs_env);
4339 R_CAR = copy_global_frame(); R_CDR = SC_NULL;
4340 interaction_env = cons();
4341 /* XXX there's probably no reason for these to be separate frames */
4342 R_EXPR = car(gscm_env);
4343 R_CAR = copy_global_frame(); R_CDR = interaction_env;
4344 interaction_env = cons();
4345
4346 /* Privileged environment for the compiler and toplevel code */
4347 R_CAR = SC_NULL; R_CDR = gscm_env;
4348 R_ENV = toplevel_env = cons();
4349 add_builtin("toplevel-environment", builtin_toplevel_env);
4350 add_variable("*max-parameters*", fixnum(EXT_LENGTH_MAX >> 1));
4351 /* ^ sign-encoded arity must fit in procedure header; frame index must fit
4352 * in variable ref header */
4353 add_builtin("define-r5rs", builtin_define_r5rs);
4354 add_builtin("define-gscm", builtin_define_gscm);
4355 add_builtin("set-input-port!", builtin_set_in_port);
4356 add_builtin("set-output-port!", builtin_set_out_port);
4357 add_builtin("push-winding!", builtin_push_winding);
4358 add_builtin("variable-ref", builtin_variable_ref);
4359 add_builtin("apply/unchecked", builtin_apply_unchecked);
4360 add_builtin("car/unchecked", builtin_car_unchecked);
4361 add_builtin("cdr/unchecked", builtin_cdr_unchecked);
4362 add_builtin("set-car/unchecked!", builtin_set_car_unchecked);
4363 add_builtin("set-cdr/unchecked!", builtin_set_cdr_unchecked);
4364 add_builtin("string-ref/unchecked", builtin_str_ref_unchecked);
4365 add_builtin("vector-ref/unchecked", builtin_vec_ref_unchecked);
4366 add_builtin("fx+/unchecked", builtin_fx_add_unchecked);
4367 add_builtin("fx-/unchecked", builtin_fx_sub_unchecked);
4368 add_builtin("fx=/unchecked", builtin_fx_eq_unchecked);
4369 add_builtin("fx</unchecked", builtin_fx_lt_unchecked);
4370 add_builtin("fx<=/unchecked", builtin_fx_le_unchecked);
4371 add_builtin("fxneg/unchecked", builtin_fx_neg_unchecked);
4372 add_builtin("fxnegative/unchecked?", builtin_is_fx_neg_unchecked);
4373 add_builtin("fxdiv/unsigned/unchecked", builtin_fx_div_unsigned_unchecked);
4374 add_builtin("fxdiv/ext/unsigned/unchecked",
4375 builtin_fx_div_ext_unsigned_unchecked);
4376 add_builtin("fixnum->dec/unchecked", builtin_fixnum_to_dec_unchecked);
4377 add_builtin("fixnum->hex/unchecked", builtin_fixnum_to_hex_unchecked);
4378 add_builtin("fixnum->oct/unchecked", builtin_fixnum_to_oct_unchecked);
4379 add_builtin("fixnum->bin/unchecked", builtin_fixnum_to_bin_unchecked);
4380 add_builtin("fixnum->bin/unsigned/unchecked",
4381 builtin_fixnum_to_bin_unsigned_unchecked);
4382 add_builtin("flonum->dec/unchecked", builtin_flonum_to_dec_unchecked);
4383 add_builtin("set-error-continuation!", builtin_set_err_cont);
4384 add_builtin("inet-stream-socket", builtin_inet_stream_sock);
4385 add_builtin("inet-dgram-socket", builtin_inet_dgram_sock);
4386 add_builtin("unix-stream-socket", builtin_unix_stream_sock);
4387 add_builtin("unix-dgram-socket", builtin_unix_dgram_sock);
4388 add_builtin("socket-ports", builtin_socket_ports);
4389 add_builtin("getsockname", builtin_getsockname);
4390 add_builtin("getpeername", builtin_getpeername);
4391 add_builtin("connect-inet", builtin_connect_inet);
4392 add_builtin("connect-unix", builtin_connect_unix);
4393 add_builtin("listen", builtin_listen);
4394 add_builtin("accept", builtin_accept);
4395 add_builtin("close", builtin_close);
4396 add_builtin("flonum?", builtin_is_flonum);
4397 add_builtin("flonum/unchecked", builtin_flonum_unchecked);
4398 add_builtin("flonum/unsigned/unchecked", builtin_flonum_unsigned_unchecked);
4399 add_builtin("flo=/unchecked", builtin_flo_eq_unchecked);
4400 add_builtin("flo</unchecked", builtin_flo_lt_unchecked);
4401 add_builtin("flo<=/unchecked", builtin_flo_le_unchecked);
4402 add_builtin("floneg/unchecked", builtin_flo_neg_unchecked);
4403 add_builtin("flonegative/unchecked?", builtin_is_flo_neg_unchecked);
4404 add_builtin("flo+/unchecked", builtin_flo_add_unchecked);
4405 add_builtin("flo-/unchecked", builtin_flo_sub_unchecked);
4406 add_builtin("flo*/unchecked", builtin_flo_mul_unchecked);
4407 add_builtin("flodiv/unchecked", builtin_flo_div_unchecked);
4408 add_builtin("floquotient/unchecked", builtin_flo_quotient_unchecked);
4409 add_builtin("floremainder/unchecked", builtin_flo_remainder_unchecked);
4410 add_builtin("fraction/exponent/unchecked", builtin_frac_exp_unchecked);
4411 add_builtin("load-exponent/unchecked", builtin_load_exp_unchecked);
4412 add_builtin("inf/unchecked?", builtin_is_inf_unchecked);
4413 add_builtin("flonum->fixnum/unchecked", builtin_flo_to_fix_unchecked);
4414 add_builtin("floor/unchecked", builtin_floor);
4415 add_builtin("ceiling/unchecked", builtin_ceiling);
4416 add_builtin("truncate/unchecked", builtin_truncate);
4417 add_builtin("round/unchecked", builtin_round);
4418 add_builtin("exp/unchecked", builtin_exp);
4419 add_builtin("log/unchecked", builtin_log);
4420 add_builtin("sin/unchecked", builtin_sin);
4421 add_builtin("cos/unchecked", builtin_cos);
4422 add_builtin("tan/unchecked", builtin_tan);
4423 add_builtin("asin/unchecked", builtin_asin);
4424 add_builtin("acos/unchecked", builtin_acos);
4425 add_builtin("atan/unchecked", builtin_atan);
4426 add_builtin("atan2/unchecked", builtin_atan2);
4427 add_builtin("sqrt/unchecked", builtin_sqrt);
4428 add_builtin("reverse-list->vector/unchecked", builtin_rev_list_to_vec_unchecked);
4429 add_builtin("builtin?", builtin_is_builtin);
4430 add_builtin("builtin-name", builtin_builtin_name);
4431 add_builtin("promise?", builtin_is_promise);
4432 add_builtin("continuation?", builtin_is_continuation);
4433 add_builtin("make-bignum", builtin_make_bignum);
4434 add_builtin("bignum?", builtin_is_bignum);
4435 add_builtin("bignum-negative?", builtin_is_bignum_negative);
4436 add_builtin("bignum-set-negative!", builtin_bignum_set_negative);
4437 add_builtin("bignum-ref", builtin_bignum_ref);
4438 add_builtin("bignum-set!", builtin_bignum_set);
4439 add_builtin("bignum-length", builtin_bignum_length);
4440 add_builtin("bignum", builtin_bignum);
4441 add_builtin("bignum/unsigned", builtin_bignum_unsigned);
4442 add_builtin("bignum2", builtin_bignum2);
4443 add_builtin("bignum-truncate!", builtin_bignum_truncate);
4444
4445 R_PORT = open_lib_file(GSCMLIB "/compiler.scm");
4446 err_context = "compiler";
4447 r_compiler_expr = sc_read();
4448 if (r_compiler_expr == SC_EOF) fatal("EOF reading compiler code");
4449 close_port(R_PORT);
4450 R_EXPR = r_compiler_expr;
4451 R_ENV = toplevel_env;
4452 evaluator();
4453 r_compiler = R_RESULT;
4454 /* Self-compile, for the speed benefit of variable refs */
4455 R_EXPR = r_compiler_expr;
4456 R_ENV = toplevel_env;
4457 r_compiler_expr = SC_NULL;
4458 evaluator();
4459 r_compiler = R_RESULT;
4460}
4461
4462int sc_toplevel(int argc, char **argv) {
4463 int i;
4464 R_CDR = SC_NULL;
4465 for (i=argc-1; i>=0; --i) {
4466 R_CAR = string(argv[i]);
4467 R_CDR = cons();
4468 }
4469 R_ENV = interaction_env;
4470 add_variable("*args*", R_CDR);
4471
4472 R_PORT = open_lib_file(GSCMLIB "/toplevel.scm");
4473 err_context = "toplevel";
4474 R_EXPR = sc_read();
4475 if (R_EXPR == SC_EOF) fatal("EOF reading toplevel code");
4476 close_port(R_PORT);
4477 R_ENV = toplevel_env;
4478 evaluator();
4479 flush_all();
4480 return fixnum_val(R_RESULT);
4481}