keithlisp/parse.c

333 lines
8.4 KiB
C

#include "parse.h"
#define lisp_tokenize_len 64
char lisp_tokenize_buf[lisp_tokenize_len];
int lisp_tokenize_pos;
bool lisp_tokenize_quote_mode;
bool lisp_tokenize_esc_mode;
void lisp_tokenize_init() {
lisp_tokenize_buf[0] = '\0';
lisp_tokenize_pos = 0;
lisp_tokenize_quote_mode = false;
lisp_tokenize_esc_mode = false;
}
void lisp_tokenize_next(lisp_cons** head) {
lisp_tokenize_buf[lisp_tokenize_pos] = '\0';
lisp_string* token_lstr = lisp_string_create(lisp_tokenize_buf);
lisp_tokenize_buf[0] = '\0';
lisp_tokenize_pos = 0;
lisp_cons* tail = dbg_malloc(sizeof(lisp_cons));
tail->car.type = LISP_T_STRING;
tail->car.value.string = token_lstr;
tail->cdr.type = LISP_T_CONS;
tail->cdr.value.cons = NULL;
if (*head != NULL)
lisp_cons_join(*head, tail);
else
*head = tail;
}
void lisp_tokenize(char* buf, int len, lisp_cons** head) {
for (int i = 0; i < len; i++) {
if (lisp_tokenize_pos >= lisp_tokenize_len-1) {
lisp_tokenize_pos = lisp_tokenize_len-1;
lisp_tokenize_next(head);
}
char c = buf[i];
if (lisp_tokenize_quote_mode) {
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
if (c == '\\')
lisp_tokenize_esc_mode = true;
else if ((c == '"') & !lisp_tokenize_esc_mode) { // end of string
lisp_tokenize_next(head);
lisp_tokenize_quote_mode = false;
} else
lisp_tokenize_esc_mode = false;
continue;
}
if (lisp_tokenize_esc_mode) {
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
lisp_tokenize_esc_mode = false;
continue;
}
if ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\v') || (c == '\f')) {
if (lisp_tokenize_pos > 0) // whitespace, end of token
lisp_tokenize_next(head);
} else if (c == ')') {
// end current token
if (lisp_tokenize_pos > 0)
lisp_tokenize_next(head);
// ) is its own token
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
lisp_tokenize_next(head);
} else if (c == '(') {
// ( is its own token
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
lisp_tokenize_next(head);
} else if ((c == '"') && (lisp_tokenize_pos == 0)) {
// enable quotes mode
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
lisp_tokenize_quote_mode = true;
} else if (c == '\\') {
lisp_tokenize_esc_mode = true;
} else {
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
}
}
}
bool lisp_parse_number(lisp_string* token_lstr, lisp_value* value) {
int token_len = lisp_string_len(token_lstr);
char* token_data = lisp_string_data(token_lstr);
bool neg = false;
bool exp_neg = false;
bool exp_mode = false;
bool point = false;
bool valid = false;
long _int = 0;
float _float = 0.0f;
float weight = 0.1f;
int exp_i = 0;
int exp = 0;
int base = 10;
for (int i = 0; i < token_len; i++) {
char c = token_data[i];
if (c >= 'a')
c = c - ('a' - 'A');
if (!exp_mode) {
if (c == '-' && i == 0) {
neg = true;
} else if (c >= '0' && c <= '9') {
valid = true;
if (!point)
_int = (_int * base) + (c - '0');
else {
_float += ((c - '0') * weight);
weight /= 10.0f;
}
} else if (c >= 'A' && c <= 'F' && base == 16) {
valid = true;
_int = (_int * base) + (c - ('A' - 10));
} else if (c == 'X') {
base = 16;
} else if (c == '.' && !point) {
point = true;
_float = (float) _int;
} else if (c == 'E') {
valid = false;
exp_mode = true;
if (!point)
_float = (float) _int;
point = true;
exp_i = i+1;
} else {
valid = false;
break;
}
} else {
if (c == '-' && i == exp_i) {
exp_neg = true;
} else if (c >= '0' && c <= '9') {
valid = true;
exp = (exp * 10) + (c - '0');
} else {
valid = false;
break;
}
}
}
if (valid && !point) {
if (neg)
_int = -_int;
value->type = LISP_T_INT;
value->value._int = _int;
return true;
} else if (valid) {
if (neg)
_float = -_float;
if (exp_neg)
exp = -exp;
value->type = LISP_T_FLOAT;
value->value._float = _float;
if (exp_mode)
value->value._float *= powf(10, exp);
return true;
}
return false;
}
bool lisp_parse_string(lisp_string* token_lstr, lisp_value* value) {
int token_len = lisp_string_len(token_lstr);
char* token_data = lisp_string_data(token_lstr);
if (token_data[0] != '"')
return false;
lisp_string* lstr = lisp_string_alloc(lisp_string_len(token_lstr) - 2);
int lstr_len = 0;
char* lstr_data = lisp_string_data(lstr);
bool esc_mode = false;
char esc_hex_mode = 0;
char esc_hex = 0;
for (int i = 1; i < token_len-1; i++) {
char c = token_data[i];
if (esc_mode) {
esc_mode = false;
switch (c) {
case 'e':
lstr_data[lstr_len++] = '\e';
break;
case 'r':
lstr_data[lstr_len++] = '\r';
break;
case 'n':
lstr_data[lstr_len++] = '\n';
break;
case 't':
lstr_data[lstr_len++] = '\t';
break;
case 'f':
lstr_data[lstr_len++] = '\f';
break;
case 'v':
lstr_data[lstr_len++] = '\v';
break;
case 'x':
esc_hex_mode = 2;
esc_hex = 0;
break;
default:
lstr_data[lstr_len++] = c;
break;
}
} else if (esc_hex_mode > 0) {
esc_hex_mode--;
esc_hex <<= 4;
if (c <= '9')
esc_hex += (c - '0');
else if (c <= 'F')
esc_hex += (c - ('A'-10));
else
esc_hex += (c - ('a'-10));
if (esc_hex_mode == 0)
lstr_data[lstr_len++] = esc_hex;
} else {
if (c == '\\') {
esc_mode = true;
} else {
lstr_data[lstr_len++] = c;
}
}
}
*((int*) lstr) = lstr_len;
value->type = LISP_T_STRING;
value->value.string = lstr;
return true;
}
bool lisp_parse_scalar(lisp_string* token_lstr, lisp_value* value) {
char* buf = lisp_string_data(token_lstr);
if (buf[0] == '(' || buf[0] == ')' || buf[0] == '\'') // parentheses, quote
return false;
if (lisp_string_len(token_lstr) == 3 && buf[0] == 'n' && buf[1] == 'i' && buf[2] == 'l') { // nil
value->type = LISP_T_CONS;
value->value.cons = NULL;
return true;
}
if (lisp_parse_string(token_lstr, value))
return true;
if (lisp_parse_number(token_lstr, value))
return true;
lisp_atom atom = lisp_atomize(token_lstr);
value->type = LISP_T_ATOM;
value->value.atom = atom;
return true;
}
bool lisp_parse_recursive(lisp_cons** tokens, bool (*fetch_tokens)(lisp_cons**), lisp_value* value) {
while (*tokens == NULL) // fetch more tokens
if ((*fetch_tokens)(tokens) == false)
return false;
lisp_cons* cons = lisp_cons_pop(tokens);
lisp_string* token_lstr = cons->car.value.string;
char* buf = lisp_string_data(token_lstr);
if (buf[0] == '\'') {
lisp_string* new_lstr = lisp_string_create_raw(lisp_string_data(token_lstr)+1, lisp_string_len(token_lstr)-1);
dbg_free(token_lstr);
cons->car.value.string = new_lstr;
*tokens = cons;
lisp_cons* quote_cons = dbg_malloc(sizeof(lisp_cons));
quote_cons->car.type = LISP_T_ATOM;
quote_cons->car.value.atom = atom_quote;
quote_cons->cdr.type = LISP_T_CONS;
quote_cons->cdr.value.cons = dbg_malloc(sizeof(lisp_cons));
quote_cons->cdr.value.cons->cdr.type = LISP_T_CONS;
quote_cons->cdr.value.cons->cdr.value.cons = NULL;
if (!lisp_parse_recursive(tokens, fetch_tokens, &quote_cons->cdr.value.cons->car))
return false;
value->type = LISP_T_CONS;
value->value.cons = quote_cons;
return true;
}
if (buf[0] == '(') {
buf = NULL;
dbg_free(token_lstr);
lisp_cons* head = NULL;
lisp_cons* tail = NULL;
lisp_cons* new_cons = cons;
bool cdr_mode = false;
while (lisp_parse_recursive(tokens, fetch_tokens, &new_cons->car)) {
new_cons->cdr.type = LISP_T_CONS;
new_cons->cdr.value.cons = NULL;
if (head == NULL)
head = new_cons;
if (tail == NULL)
tail = head;
else {
if (cdr_mode) {
tail->cdr = new_cons->car;
dbg_free(new_cons);
} else {
tail->cdr.value.cons = new_cons;
tail = new_cons;
}
}
new_cons = dbg_malloc(sizeof(lisp_cons));
while (*tokens == NULL) // fetch more tokens
if ((*fetch_tokens)(tokens) == false)
return false;
token_lstr = (*tokens)->car.value.string;
if (lisp_string_len(token_lstr) == 1) {
if (lisp_string_data(token_lstr)[0] == ')') {
cons = lisp_cons_pop(tokens);
dbg_free(cons);
dbg_free(token_lstr);
break;
} else if (lisp_string_data(token_lstr)[0] == '.') {
cons = lisp_cons_pop(tokens);
dbg_free(cons);
dbg_free(token_lstr);
cdr_mode = true;
}
}
}
dbg_free(new_cons);
value->type = LISP_T_CONS;
value->value.cons = head;
return true;
}
if (lisp_parse_scalar(token_lstr, value)) {
dbg_free(cons);
dbg_free(token_lstr);
return true;
}
*tokens = cons;
return false;
}