2021-09-22 13:54:29 +00:00
|
|
|
#include "parse.h"
|
|
|
|
|
|
|
|
#define lisp_tokenize_len 64
|
|
|
|
char lisp_tokenize_buf[lisp_tokenize_len];
|
|
|
|
int lisp_tokenize_pos;
|
|
|
|
bool lisp_tokenize_quote_mode;
|
|
|
|
bool lisp_tokenize_esc_mode;
|
|
|
|
void lisp_tokenize_init() {
|
|
|
|
lisp_tokenize_buf[0] = '\0';
|
|
|
|
lisp_tokenize_pos = 0;
|
|
|
|
lisp_tokenize_quote_mode = false;
|
|
|
|
lisp_tokenize_esc_mode = false;
|
|
|
|
}
|
|
|
|
void lisp_tokenize_next(lisp_cons** head) {
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos] = '\0';
|
|
|
|
lisp_string* token_lstr = lisp_string_create(lisp_tokenize_buf);
|
|
|
|
lisp_tokenize_buf[0] = '\0';
|
|
|
|
lisp_tokenize_pos = 0;
|
|
|
|
|
|
|
|
lisp_cons* tail = dbg_malloc(sizeof(lisp_cons));
|
|
|
|
tail->car.type = LISP_T_STRING;
|
|
|
|
tail->car.value.string = token_lstr;
|
|
|
|
tail->cdr.type = LISP_T_CONS;
|
|
|
|
tail->cdr.value.cons = NULL;
|
|
|
|
if (*head != NULL)
|
|
|
|
lisp_cons_join(*head, tail);
|
|
|
|
else
|
|
|
|
*head = tail;
|
|
|
|
}
|
|
|
|
void lisp_tokenize(char* buf, int len, lisp_cons** head) {
|
|
|
|
for (int i = 0; i < len; i++) {
|
|
|
|
if (lisp_tokenize_pos >= lisp_tokenize_len-1) {
|
|
|
|
lisp_tokenize_pos = lisp_tokenize_len-1;
|
|
|
|
lisp_tokenize_next(head);
|
|
|
|
}
|
|
|
|
char c = buf[i];
|
|
|
|
if (lisp_tokenize_quote_mode) {
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
|
|
|
|
if (c == '\\')
|
|
|
|
lisp_tokenize_esc_mode = true;
|
|
|
|
else if ((c == '"') & !lisp_tokenize_esc_mode) { // end of string
|
|
|
|
lisp_tokenize_next(head);
|
|
|
|
lisp_tokenize_quote_mode = false;
|
|
|
|
} else
|
|
|
|
lisp_tokenize_esc_mode = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lisp_tokenize_esc_mode) {
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
|
|
|
|
lisp_tokenize_esc_mode = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\v') || (c == '\f')) {
|
|
|
|
if (lisp_tokenize_pos > 0) // whitespace, end of token
|
|
|
|
lisp_tokenize_next(head);
|
|
|
|
} else if (c == ')') {
|
|
|
|
// end current token
|
|
|
|
if (lisp_tokenize_pos > 0)
|
|
|
|
lisp_tokenize_next(head);
|
|
|
|
// ) is its own token
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
|
|
|
|
lisp_tokenize_next(head);
|
|
|
|
} else if (c == '(') {
|
|
|
|
// ( is its own token
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
|
|
|
|
lisp_tokenize_next(head);
|
|
|
|
} else if ((c == '"') && (lisp_tokenize_pos == 0)) {
|
|
|
|
// enable quotes mode
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
|
|
|
|
lisp_tokenize_quote_mode = true;
|
|
|
|
} else if (c == '\\') {
|
|
|
|
lisp_tokenize_esc_mode = true;
|
|
|
|
} else {
|
|
|
|
lisp_tokenize_buf[lisp_tokenize_pos++] = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool lisp_parse_number(lisp_string* token_lstr, lisp_value* value) {
|
|
|
|
int token_len = lisp_string_len(token_lstr);
|
|
|
|
char* token_data = lisp_string_data(token_lstr);
|
|
|
|
|
|
|
|
bool neg = false;
|
|
|
|
bool exp_neg = false;
|
|
|
|
bool exp_mode = false;
|
|
|
|
bool point = false;
|
|
|
|
bool valid = false;
|
|
|
|
long _int = 0;
|
|
|
|
float _float = 0.0f;
|
|
|
|
float weight = 0.1f;
|
|
|
|
int exp_i = 0;
|
|
|
|
int exp = 0;
|
2021-09-23 16:57:26 +00:00
|
|
|
int base = 10;
|
2021-09-22 13:54:29 +00:00
|
|
|
for (int i = 0; i < token_len; i++) {
|
|
|
|
char c = token_data[i];
|
2021-09-23 16:57:26 +00:00
|
|
|
if (c >= 'a')
|
|
|
|
c = c - ('a' - 'A');
|
2021-09-22 13:54:29 +00:00
|
|
|
if (!exp_mode) {
|
|
|
|
if (c == '-' && i == 0) {
|
|
|
|
neg = true;
|
|
|
|
} else if (c >= '0' && c <= '9') {
|
|
|
|
valid = true;
|
|
|
|
if (!point)
|
2021-09-23 16:57:26 +00:00
|
|
|
_int = (_int * base) + (c - '0');
|
2021-09-22 13:54:29 +00:00
|
|
|
else {
|
|
|
|
_float += ((c - '0') * weight);
|
|
|
|
weight /= 10.0f;
|
|
|
|
}
|
2021-09-23 16:57:26 +00:00
|
|
|
} else if (c >= 'A' && c <= 'F' && base == 16) {
|
|
|
|
valid = true;
|
|
|
|
_int = (_int * base) + (c - ('A' - 10));
|
|
|
|
} else if (c == 'X') {
|
|
|
|
base = 16;
|
2021-09-22 13:54:29 +00:00
|
|
|
} else if (c == '.' && !point) {
|
|
|
|
point = true;
|
|
|
|
_float = (float) _int;
|
2021-09-23 16:57:26 +00:00
|
|
|
} else if (c == 'E') {
|
2021-09-22 13:54:29 +00:00
|
|
|
valid = false;
|
|
|
|
exp_mode = true;
|
|
|
|
if (!point)
|
|
|
|
_float = (float) _int;
|
|
|
|
point = true;
|
|
|
|
exp_i = i+1;
|
|
|
|
} else {
|
|
|
|
valid = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (c == '-' && i == exp_i) {
|
|
|
|
exp_neg = true;
|
|
|
|
} else if (c >= '0' && c <= '9') {
|
|
|
|
valid = true;
|
|
|
|
exp = (exp * 10) + (c - '0');
|
|
|
|
} else {
|
|
|
|
valid = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (valid && !point) {
|
|
|
|
if (neg)
|
|
|
|
_int = -_int;
|
|
|
|
value->type = LISP_T_INT;
|
|
|
|
value->value._int = _int;
|
|
|
|
return true;
|
|
|
|
} else if (valid) {
|
|
|
|
if (neg)
|
|
|
|
_float = -_float;
|
|
|
|
if (exp_neg)
|
|
|
|
exp = -exp;
|
|
|
|
value->type = LISP_T_FLOAT;
|
|
|
|
value->value._float = _float;
|
|
|
|
if (exp_mode)
|
|
|
|
value->value._float *= powf(10, exp);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
bool lisp_parse_string(lisp_string* token_lstr, lisp_value* value) {
|
|
|
|
int token_len = lisp_string_len(token_lstr);
|
|
|
|
char* token_data = lisp_string_data(token_lstr);
|
|
|
|
|
|
|
|
if (token_data[0] != '"')
|
|
|
|
return false;
|
|
|
|
|
|
|
|
lisp_string* lstr = lisp_string_alloc(lisp_string_len(token_lstr) - 2);
|
|
|
|
int lstr_len = 0;
|
|
|
|
char* lstr_data = lisp_string_data(lstr);
|
|
|
|
|
|
|
|
bool esc_mode = false;
|
|
|
|
char esc_hex_mode = 0;
|
|
|
|
char esc_hex = 0;
|
|
|
|
for (int i = 1; i < token_len-1; i++) {
|
|
|
|
char c = token_data[i];
|
|
|
|
if (esc_mode) {
|
|
|
|
esc_mode = false;
|
|
|
|
switch (c) {
|
|
|
|
case 'e':
|
|
|
|
lstr_data[lstr_len++] = '\e';
|
|
|
|
break;
|
2021-09-23 16:57:26 +00:00
|
|
|
case 'r':
|
|
|
|
lstr_data[lstr_len++] = '\r';
|
2021-09-22 13:54:29 +00:00
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
lstr_data[lstr_len++] = '\n';
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
lstr_data[lstr_len++] = '\t';
|
|
|
|
break;
|
2021-09-23 16:57:26 +00:00
|
|
|
case 'f':
|
|
|
|
lstr_data[lstr_len++] = '\f';
|
|
|
|
break;
|
2021-09-22 13:54:29 +00:00
|
|
|
case 'v':
|
|
|
|
lstr_data[lstr_len++] = '\v';
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
esc_hex_mode = 2;
|
|
|
|
esc_hex = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
lstr_data[lstr_len++] = c;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else if (esc_hex_mode > 0) {
|
|
|
|
esc_hex_mode--;
|
|
|
|
esc_hex <<= 4;
|
|
|
|
if (c <= '9')
|
|
|
|
esc_hex += (c - '0');
|
|
|
|
else if (c <= 'F')
|
|
|
|
esc_hex += (c - ('A'-10));
|
|
|
|
else
|
|
|
|
esc_hex += (c - ('a'-10));
|
|
|
|
|
|
|
|
if (esc_hex_mode == 0)
|
|
|
|
lstr_data[lstr_len++] = esc_hex;
|
|
|
|
} else {
|
|
|
|
if (c == '\\') {
|
|
|
|
esc_mode = true;
|
|
|
|
} else {
|
|
|
|
lstr_data[lstr_len++] = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*((int*) lstr) = lstr_len;
|
|
|
|
value->type = LISP_T_STRING;
|
|
|
|
value->value.string = lstr;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
bool lisp_parse_scalar(lisp_string* token_lstr, lisp_value* value) {
|
|
|
|
char* buf = lisp_string_data(token_lstr);
|
|
|
|
if (buf[0] == '(' || buf[0] == ')' || buf[0] == '\'') // parentheses, quote
|
|
|
|
return false;
|
|
|
|
if (lisp_string_len(token_lstr) == 3 && buf[0] == 'n' && buf[1] == 'i' && buf[2] == 'l') { // nil
|
|
|
|
value->type = LISP_T_CONS;
|
|
|
|
value->value.cons = NULL;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lisp_parse_string(token_lstr, value))
|
|
|
|
return true;
|
|
|
|
if (lisp_parse_number(token_lstr, value))
|
|
|
|
return true;
|
|
|
|
lisp_atom atom = lisp_atomize(token_lstr);
|
|
|
|
value->type = LISP_T_ATOM;
|
|
|
|
value->value.atom = atom;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool lisp_parse_recursive(lisp_cons** tokens, bool (*fetch_tokens)(lisp_cons**), lisp_value* value) {
|
|
|
|
while (*tokens == NULL) // fetch more tokens
|
|
|
|
if ((*fetch_tokens)(tokens) == false)
|
|
|
|
return false;
|
|
|
|
lisp_cons* cons = lisp_cons_pop(tokens);
|
|
|
|
lisp_string* token_lstr = cons->car.value.string;
|
|
|
|
char* buf = lisp_string_data(token_lstr);
|
|
|
|
if (buf[0] == '\'') {
|
|
|
|
lisp_string* new_lstr = lisp_string_create_raw(lisp_string_data(token_lstr)+1, lisp_string_len(token_lstr)-1);
|
|
|
|
dbg_free(token_lstr);
|
|
|
|
cons->car.value.string = new_lstr;
|
|
|
|
*tokens = cons;
|
|
|
|
lisp_cons* quote_cons = dbg_malloc(sizeof(lisp_cons));
|
|
|
|
quote_cons->car.type = LISP_T_ATOM;
|
|
|
|
quote_cons->car.value.atom = atom_quote;
|
|
|
|
quote_cons->cdr.type = LISP_T_CONS;
|
|
|
|
quote_cons->cdr.value.cons = dbg_malloc(sizeof(lisp_cons));
|
|
|
|
quote_cons->cdr.value.cons->cdr.type = LISP_T_CONS;
|
|
|
|
quote_cons->cdr.value.cons->cdr.value.cons = NULL;
|
|
|
|
if (!lisp_parse_recursive(tokens, fetch_tokens, "e_cons->cdr.value.cons->car))
|
|
|
|
return false;
|
|
|
|
value->type = LISP_T_CONS;
|
|
|
|
value->value.cons = quote_cons;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (buf[0] == '(') {
|
|
|
|
buf = NULL;
|
|
|
|
dbg_free(token_lstr);
|
|
|
|
lisp_cons* head = NULL;
|
|
|
|
lisp_cons* tail = NULL;
|
|
|
|
lisp_cons* new_cons = cons;
|
|
|
|
bool cdr_mode = false;
|
|
|
|
while (lisp_parse_recursive(tokens, fetch_tokens, &new_cons->car)) {
|
|
|
|
new_cons->cdr.type = LISP_T_CONS;
|
|
|
|
new_cons->cdr.value.cons = NULL;
|
|
|
|
if (head == NULL)
|
|
|
|
head = new_cons;
|
|
|
|
if (tail == NULL)
|
|
|
|
tail = head;
|
|
|
|
else {
|
|
|
|
if (cdr_mode) {
|
|
|
|
tail->cdr = new_cons->car;
|
|
|
|
dbg_free(new_cons);
|
|
|
|
} else {
|
|
|
|
tail->cdr.value.cons = new_cons;
|
|
|
|
tail = new_cons;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
new_cons = dbg_malloc(sizeof(lisp_cons));
|
|
|
|
|
|
|
|
while (*tokens == NULL) // fetch more tokens
|
|
|
|
if ((*fetch_tokens)(tokens) == false)
|
|
|
|
return false;
|
|
|
|
token_lstr = (*tokens)->car.value.string;
|
|
|
|
if (lisp_string_len(token_lstr) == 1) {
|
|
|
|
if (lisp_string_data(token_lstr)[0] == ')') {
|
|
|
|
cons = lisp_cons_pop(tokens);
|
|
|
|
dbg_free(cons);
|
|
|
|
dbg_free(token_lstr);
|
|
|
|
break;
|
|
|
|
} else if (lisp_string_data(token_lstr)[0] == '.') {
|
|
|
|
cons = lisp_cons_pop(tokens);
|
|
|
|
dbg_free(cons);
|
|
|
|
dbg_free(token_lstr);
|
|
|
|
cdr_mode = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dbg_free(new_cons);
|
|
|
|
value->type = LISP_T_CONS;
|
|
|
|
value->value.cons = head;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (lisp_parse_scalar(token_lstr, value)) {
|
|
|
|
dbg_free(cons);
|
|
|
|
dbg_free(token_lstr);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
*tokens = cons;
|
|
|
|
return false;
|
|
|
|
}
|