#include "parse.h" #define lisp_tokenize_len 64 char lisp_tokenize_buf[lisp_tokenize_len]; int lisp_tokenize_pos; bool lisp_tokenize_quote_mode; bool lisp_tokenize_esc_mode; void lisp_tokenize_init() { lisp_tokenize_buf[0] = '\0'; lisp_tokenize_pos = 0; lisp_tokenize_quote_mode = false; lisp_tokenize_esc_mode = false; } void lisp_tokenize_next(lisp_cons** head) { lisp_tokenize_buf[lisp_tokenize_pos] = '\0'; lisp_string* token_lstr = lisp_string_create(lisp_tokenize_buf); lisp_tokenize_buf[0] = '\0'; lisp_tokenize_pos = 0; lisp_cons* tail = dbg_malloc(sizeof(lisp_cons)); tail->car.type = LISP_T_STRING; tail->car.value.string = token_lstr; tail->cdr.type = LISP_T_CONS; tail->cdr.value.cons = NULL; if (*head != NULL) lisp_cons_join(*head, tail); else *head = tail; } void lisp_tokenize(char* buf, int len, lisp_cons** head) { for (int i = 0; i < len; i++) { if (lisp_tokenize_pos >= lisp_tokenize_len-1) { lisp_tokenize_pos = lisp_tokenize_len-1; lisp_tokenize_next(head); } char c = buf[i]; if (lisp_tokenize_quote_mode) { lisp_tokenize_buf[lisp_tokenize_pos++] = c; if (c == '\\') lisp_tokenize_esc_mode = true; else if ((c == '"') & !lisp_tokenize_esc_mode) { // end of string lisp_tokenize_next(head); lisp_tokenize_quote_mode = false; } else lisp_tokenize_esc_mode = false; continue; } if (lisp_tokenize_esc_mode) { lisp_tokenize_buf[lisp_tokenize_pos++] = c; lisp_tokenize_esc_mode = false; continue; } if ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\v') || (c == '\f')) { if (lisp_tokenize_pos > 0) // whitespace, end of token lisp_tokenize_next(head); } else if (c == ')') { // end current token if (lisp_tokenize_pos > 0) lisp_tokenize_next(head); // ) is its own token lisp_tokenize_buf[lisp_tokenize_pos++] = c; lisp_tokenize_next(head); } else if (c == '(') { // ( is its own token lisp_tokenize_buf[lisp_tokenize_pos++] = c; lisp_tokenize_next(head); } else if ((c == '"') && (lisp_tokenize_pos == 0)) { // enable quotes mode lisp_tokenize_buf[lisp_tokenize_pos++] = c; lisp_tokenize_quote_mode = true; } else if (c == '\\') { lisp_tokenize_esc_mode = true; } else { lisp_tokenize_buf[lisp_tokenize_pos++] = c; } } } bool lisp_parse_number(lisp_string* token_lstr, lisp_value* value) { int token_len = lisp_string_len(token_lstr); char* token_data = lisp_string_data(token_lstr); bool neg = false; bool exp_neg = false; bool exp_mode = false; bool point = false; bool valid = false; long _int = 0; float _float = 0.0f; float weight = 0.1f; int exp_i = 0; int exp = 0; int base = 10; for (int i = 0; i < token_len; i++) { char c = token_data[i]; if (c >= 'a') c = c - ('a' - 'A'); if (!exp_mode) { if (c == '-' && i == 0) { neg = true; } else if (c >= '0' && c <= '9') { valid = true; if (!point) _int = (_int * base) + (c - '0'); else { _float += ((c - '0') * weight); weight /= 10.0f; } } else if (c >= 'A' && c <= 'F' && base == 16) { valid = true; _int = (_int * base) + (c - ('A' - 10)); } else if (c == 'X') { base = 16; } else if (c == '.' && !point) { point = true; _float = (float) _int; } else if (c == 'E') { valid = false; exp_mode = true; if (!point) _float = (float) _int; point = true; exp_i = i+1; } else { valid = false; break; } } else { if (c == '-' && i == exp_i) { exp_neg = true; } else if (c >= '0' && c <= '9') { valid = true; exp = (exp * 10) + (c - '0'); } else { valid = false; break; } } } if (valid && !point) { if (neg) _int = -_int; value->type = LISP_T_INT; value->value._int = _int; return true; } else if (valid) { if (neg) _float = -_float; if (exp_neg) exp = -exp; value->type = LISP_T_FLOAT; value->value._float = _float; if (exp_mode) value->value._float *= powf(10, exp); return true; } return false; } bool lisp_parse_string(lisp_string* token_lstr, lisp_value* value) { int token_len = lisp_string_len(token_lstr); char* token_data = lisp_string_data(token_lstr); if (token_data[0] != '"') return false; lisp_string* lstr = lisp_string_alloc(lisp_string_len(token_lstr) - 2); int lstr_len = 0; char* lstr_data = lisp_string_data(lstr); bool esc_mode = false; char esc_hex_mode = 0; char esc_hex = 0; for (int i = 1; i < token_len-1; i++) { char c = token_data[i]; if (esc_mode) { esc_mode = false; switch (c) { case 'e': lstr_data[lstr_len++] = '\e'; break; case 'r': lstr_data[lstr_len++] = '\r'; break; case 'n': lstr_data[lstr_len++] = '\n'; break; case 't': lstr_data[lstr_len++] = '\t'; break; case 'f': lstr_data[lstr_len++] = '\f'; break; case 'v': lstr_data[lstr_len++] = '\v'; break; case 'x': esc_hex_mode = 2; esc_hex = 0; break; default: lstr_data[lstr_len++] = c; break; } } else if (esc_hex_mode > 0) { esc_hex_mode--; esc_hex <<= 4; if (c <= '9') esc_hex += (c - '0'); else if (c <= 'F') esc_hex += (c - ('A'-10)); else esc_hex += (c - ('a'-10)); if (esc_hex_mode == 0) lstr_data[lstr_len++] = esc_hex; } else { if (c == '\\') { esc_mode = true; } else { lstr_data[lstr_len++] = c; } } } *((int*) lstr) = lstr_len; value->type = LISP_T_STRING; value->value.string = lstr; return true; } bool lisp_parse_scalar(lisp_string* token_lstr, lisp_value* value) { char* buf = lisp_string_data(token_lstr); if (buf[0] == '(' || buf[0] == ')' || buf[0] == '\'') // parentheses, quote return false; if (lisp_string_len(token_lstr) == 3 && buf[0] == 'n' && buf[1] == 'i' && buf[2] == 'l') { // nil value->type = LISP_T_CONS; value->value.cons = NULL; return true; } if (lisp_parse_string(token_lstr, value)) return true; if (lisp_parse_number(token_lstr, value)) return true; lisp_atom atom = lisp_atomize(token_lstr); value->type = LISP_T_ATOM; value->value.atom = atom; return true; } bool lisp_parse_recursive(lisp_cons** tokens, bool (*fetch_tokens)(lisp_cons**), lisp_value* value) { while (*tokens == NULL) // fetch more tokens if ((*fetch_tokens)(tokens) == false) return false; lisp_cons* cons = lisp_cons_pop(tokens); lisp_string* token_lstr = cons->car.value.string; char* buf = lisp_string_data(token_lstr); if (buf[0] == '\'') { lisp_string* new_lstr = lisp_string_create_raw(lisp_string_data(token_lstr)+1, lisp_string_len(token_lstr)-1); dbg_free(token_lstr); cons->car.value.string = new_lstr; *tokens = cons; lisp_cons* quote_cons = dbg_malloc(sizeof(lisp_cons)); quote_cons->car.type = LISP_T_ATOM; quote_cons->car.value.atom = atom_quote; quote_cons->cdr.type = LISP_T_CONS; quote_cons->cdr.value.cons = dbg_malloc(sizeof(lisp_cons)); quote_cons->cdr.value.cons->cdr.type = LISP_T_CONS; quote_cons->cdr.value.cons->cdr.value.cons = NULL; if (!lisp_parse_recursive(tokens, fetch_tokens, "e_cons->cdr.value.cons->car)) return false; value->type = LISP_T_CONS; value->value.cons = quote_cons; return true; } if (buf[0] == '(') { buf = NULL; dbg_free(token_lstr); lisp_cons* head = NULL; lisp_cons* tail = NULL; lisp_cons* new_cons = cons; bool cdr_mode = false; while (lisp_parse_recursive(tokens, fetch_tokens, &new_cons->car)) { new_cons->cdr.type = LISP_T_CONS; new_cons->cdr.value.cons = NULL; if (head == NULL) head = new_cons; if (tail == NULL) tail = head; else { if (cdr_mode) { tail->cdr = new_cons->car; dbg_free(new_cons); } else { tail->cdr.value.cons = new_cons; tail = new_cons; } } new_cons = dbg_malloc(sizeof(lisp_cons)); while (*tokens == NULL) // fetch more tokens if ((*fetch_tokens)(tokens) == false) return false; token_lstr = (*tokens)->car.value.string; if (lisp_string_len(token_lstr) == 1) { if (lisp_string_data(token_lstr)[0] == ')') { cons = lisp_cons_pop(tokens); dbg_free(cons); dbg_free(token_lstr); break; } else if (lisp_string_data(token_lstr)[0] == '.') { cons = lisp_cons_pop(tokens); dbg_free(cons); dbg_free(token_lstr); cdr_mode = true; } } } dbg_free(new_cons); value->type = LISP_T_CONS; value->value.cons = head; return true; } if (lisp_parse_scalar(token_lstr, value)) { dbg_free(cons); dbg_free(token_lstr); return true; } *tokens = cons; return false; }