Squashed 'lispBM/lispBM/' changes from b6a136a2..4cf1df6f

4cf1df6f refactoring of the array-reader in preparation for future reader modifications to allow streaming of programs
6075bc8d adding some potentially dangerous omitted checks

git-subtree-dir: lispBM/lispBM
git-subtree-split: 4cf1df6f164d3a81188eb065e625455f12cbd869
This commit is contained in:
Benjamin Vedder 2022-09-05 20:06:04 +02:00
parent c0a44bd5ca
commit 4a62f04205
5 changed files with 172 additions and 135 deletions

View File

@ -178,6 +178,9 @@
#define SYM_DOT 0x76
#define SYM_QUOTE_IT 0x77
#define SYM_COLON 0x78
#define SYM_TOKENIZER_WAIT 0x79
#define SYM_OPENBRACK 0x80
#define SYM_CLOSEBRACK 0x81
// Fundamental Operations
#define FUNDAMENTALS_START 0x100

View File

@ -33,6 +33,11 @@ extern "C" {
/*! \page changelog Changelog
Set 5 2022: Version 0.6.0
- Refactoring of array-reader. Array reading is nolonger done monolithically
inside of the tokpar framework, but rather as a cooperation between the
evaluator and the tokenizer.
Sep 3 2022: Version 0.6.0
- Round-robin scheduling + Addition of an Atomic construct to use with care.

View File

@ -71,7 +71,8 @@
#define READ_COMMAAT_RESULT ((39 << LBM_VAL_SHIFT) | LBM_TYPE_U)
#define READ_COMMA_RESULT ((40 << LBM_VAL_SHIFT) | LBM_TYPE_U)
#define READ_TERMINATE_COLON ((41 << LBM_VAL_SHIFT) | LBM_TYPE_U)
#define READ_START_ARRAY ((42 << LBM_VAL_SHIFT) | LBM_TYPE_U)
#define READ_APPEND_ARRAY ((43 << LBM_VAL_SHIFT) | LBM_TYPE_U)
@ -1606,7 +1607,7 @@ static inline void cont_expand_macro(eval_context_t *ctx) {
lbm_uint* sptr = lbm_get_stack_ptr(&ctx->K, 2);
if (!sptr) {
error_ctx(ENC_SYM_TERROR);
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_value env = (lbm_value)sptr[0];
@ -1936,7 +1937,7 @@ static inline void cont_application(eval_context_t *ctx) {
lbm_uint *fun_args = lbm_get_stack_ptr(&ctx->K, arg_count+1);
if (fun_args == NULL) {
ctx->r = ENC_SYM_FATAL_ERROR;
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_value fun = fun_args[0];
@ -2298,6 +2299,10 @@ static inline void cont_read_next_token(eval_context_t *ctx) {
lbm_pop(&ctx->K, &stream);
lbm_stream_t *str = lbm_dec_stream(stream);
if (str == NULL || str->state == NULL) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state;
lbm_value tok = token_stream_get(str);
@ -2313,6 +2318,11 @@ static inline void cont_read_next_token(eval_context_t *ctx) {
error_ctx(ENC_SYM_MERROR);
done_reading(ctx->id);
return;
case SYM_TOKENIZER_WAIT:
CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN));
ctx->app_cont = true;
yield_ctx(EVAL_CPS_MIN_SLEEP);
return;
case SYM_TOKENIZER_DONE:
/* Tokenizer reached "end of file"
The parser could be in a state where it needs
@ -2360,6 +2370,15 @@ static inline void cont_read_next_token(eval_context_t *ctx) {
CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN));
ctx->app_cont = true;
break;
case SYM_CLOSEBRACK:
ctx->r = tok;
ctx->app_cont = true;
break;
case SYM_OPENBRACK:
CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_START_ARRAY));
CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN));
ctx->app_cont = true;
break;
case SYM_QUOTE_IT:
CHECK_STACK(lbm_push(&ctx->K, READ_QUOTE_RESULT));
CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN));
@ -2398,6 +2417,124 @@ static inline void cont_read_next_token(eval_context_t *ctx) {
}
}
static inline void cont_read_start_array(eval_context_t *ctx) {
lbm_value stream;
lbm_pop(&ctx->K, &stream);
lbm_uint num_free = lbm_memory_longest_free();
lbm_uint initial_size = (lbm_uint)((float)num_free * 0.9);
if (initial_size == 0) {
gc(ENC_SYM_NIL, ENC_SYM_NIL);
num_free = lbm_memory_longest_free();
initial_size = (lbm_uint)((float)num_free * 0.9);
if (initial_size == 0) {
error_ctx(ENC_SYM_MERROR);
return;
}
}
if ((lbm_type_of(ctx->r) == LBM_TYPE_SYMBOL) &&
((ctx->r == ENC_SYM_TYPE_I32) ||
(ctx->r == ENC_SYM_TYPE_U32) ||
(ctx->r == ENC_SYM_TYPE_FLOAT) ||
(ctx->r == ENC_SYM_TYPE_CHAR))) {
lbm_type t;
switch (ctx->r) {
case ENC_SYM_TYPE_I32: t = LBM_TYPE_I32; break;
case ENC_SYM_TYPE_U32: t = LBM_TYPE_U32; break;
case ENC_SYM_TYPE_FLOAT: t = LBM_TYPE_FLOAT; break;
case ENC_SYM_TYPE_CHAR: t = LBM_TYPE_CHAR; break;
default:
error_ctx(ENC_SYM_TERROR);
return;
}
if (ctx->r == ENC_SYM_TYPE_CHAR) {
initial_size = sizeof(lbm_uint) * initial_size;
}
lbm_value array;
if (!lbm_heap_allocate_array(&array, initial_size, t)) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
CHECK_STACK(lbm_push_5(&ctx->K, array, lbm_enc_u(0), ctx->r, stream, READ_APPEND_ARRAY));
CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN));
ctx->app_cont = true;
} else if (lbm_is_number(ctx->r)) {
lbm_value array;
if (!lbm_heap_allocate_array(&array, initial_size, LBM_TYPE_CHAR)) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
CHECK_STACK(lbm_push_5(&ctx->K, array, lbm_enc_u(0), ENC_SYM_TYPE_CHAR, stream, READ_APPEND_ARRAY));
ctx->app_cont = true;
} else {
error_ctx(ENC_SYM_RERROR);
}
}
static inline void cont_read_append_array(eval_context_t *ctx) {
lbm_uint *sptr = lbm_get_stack_ptr(&ctx->K, 4);
if (!sptr) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_value array = sptr[0];
lbm_value ix = lbm_dec_as_u32(sptr[1]);
lbm_value type = sptr[2];
lbm_value stream = sptr[3];
lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(array); // TODO: Check
if (lbm_is_number(ctx->r)) {
switch(type) {
case ENC_SYM_TYPE_CHAR:
((uint8_t*)arr->data)[ix] = (uint8_t)lbm_dec_as_u32(ctx->r);
break;
case ENC_SYM_TYPE_I32:
((lbm_int*)arr->data)[ix] = lbm_dec_as_i32(ctx->r);
break;
case ENC_SYM_TYPE_U32:
((lbm_uint*)arr->data)[ix] = lbm_dec_as_u32(ctx->r);
break;
case ENC_SYM_TYPE_FLOAT: {
float f = lbm_dec_as_float(ctx->r);
memcpy(&arr->data[ix], (uint32_t*)&f, sizeof(float));
} break;
default:
error_ctx(ENC_SYM_TERROR);
return;
}
sptr[1] = lbm_enc_u(ix + 1);
CHECK_STACK(lbm_push_3(&ctx->K, READ_APPEND_ARRAY, stream, READ_NEXT_TOKEN));
ctx->app_cont = true;
} else if (lbm_is_symbol(ctx->r) && lbm_dec_sym(ctx->r) == SYM_CLOSEBRACK) {
lbm_uint array_size = ix;
if (type == ENC_SYM_TYPE_CHAR) {
if (array_size % 4) {
array_size = (array_size / 4) + 1;
} else {
array_size = array_size / 4;
}
}
lbm_memory_shrink((lbm_uint*)arr->data, array_size);
arr->size = ix;
lbm_stack_drop(&ctx->K, 4);
ctx->r = array;
ctx->app_cont = true;
} else {
error_ctx(ENC_SYM_TERROR);
}
}
static inline void cont_read_append_continue(eval_context_t *ctx) {
lbm_value *sptr = lbm_get_stack_ptr(&ctx->K, 3);
@ -2453,6 +2590,10 @@ static inline void cont_read_expect_closepar(eval_context_t *ctx) {
lbm_pop_2(&ctx->K, &res, &stream);
lbm_stream_t *str = lbm_dec_stream(stream);
if (str == NULL || str->state == NULL) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state;
if (lbm_type_of(ctx->r) == LBM_TYPE_SYMBOL &&
@ -2479,6 +2620,10 @@ static inline void cont_read_dot_terminate(eval_context_t *ctx) {
lbm_value stream = sptr[2];
lbm_stream_t *str = lbm_dec_stream(stream);
if (str == NULL || str->state == NULL) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state;
lbm_stack_drop(&ctx->K ,3);
@ -2516,6 +2661,10 @@ static inline void cont_read_done(eval_context_t *ctx) {
lbm_pop_2(&ctx->K, &stream, &prg_tag);
lbm_stream_t *str = lbm_dec_stream(stream);
if (str == NULL || str->state == NULL) {
error_ctx(ENC_SYM_FATAL_ERROR);
return;
}
lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state;
lbm_value tok = token_stream_get(str);
@ -2725,6 +2874,8 @@ static void evaluation_step(void){
case READ_COMMAAT_RESULT: cont_read_commaat_result(ctx); return;
case READ_COMMA_RESULT: cont_read_comma_result(ctx); return;
case READ_TERMINATE_COLON: cont_read_terminate_colon(ctx); return;
case READ_START_ARRAY: cont_read_start_array(ctx); return;
case READ_APPEND_ARRAY: cont_read_append_array(ctx); return;
default:
error_ctx(ENC_SYM_EERROR);
return;
@ -2894,7 +3045,7 @@ void lbm_run_eval(void){
eval_context_t *next_to_run = NULL;
if (eval_steps_quota <= 0 || !ctx_running) {
uint32_t us;
uint32_t us = EVAL_CPS_MIN_SLEEP;
if (is_atomic) {
if (ctx_running) {

View File

@ -106,6 +106,9 @@ special_sym const special_symbols[] = {
{"sym_tok_done" , SYM_TOKENIZER_DONE},
{"sym_quote_it" , SYM_QUOTE_IT},
{"sym_colon" , SYM_COLON},
{"sym_tok_wait" , SYM_TOKENIZER_WAIT},
{"sym_openbrack" , SYM_OPENBRACK},
{"sym_closebrack" , SYM_CLOSEBRACK},
// special symbols with parseable names
{"type-list" , SYM_TYPE_LIST},

View File

@ -452,120 +452,6 @@ int tok_integer(lbm_tokenizer_char_stream_t *str, token_int *result ) {
return 0;
}
bool parse_array(lbm_tokenizer_char_stream_t *str, lbm_uint initial_size, lbm_value *res) {
lbm_type t = LBM_TYPE_BYTE; // default
int n = 0;
clean_whitespace(str);
if (!more(str)) {
return false;
}
n = tok_symbol(str);
if (n > 0) {
if (strncmp(sym_str, "type-i32", (uint32_t)n) == 0) {
t = LBM_TYPE_I32;
} else if (strncmp(sym_str, "type-u32", (uint32_t)n) == 0) {
t = LBM_TYPE_U32;
} else if (strncmp(sym_str, "type-float", (uint32_t)n) == 0) {
t = LBM_TYPE_FLOAT;
} else if (strncmp(sym_str, "type-byte", (uint32_t)n) == 0) {
t = LBM_TYPE_BYTE;
initial_size = sizeof(lbm_uint) * initial_size;
}
} else {
t = LBM_TYPE_BYTE;
initial_size = sizeof(lbm_uint) * initial_size;
}
lbm_value array;
if (!lbm_heap_allocate_array(&array, initial_size, t)) {
return false;
}
lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(array);
bool done = false;
lbm_uint ix = 0;
while (!done) {
clean_whitespace(str);
if (!more(str)) {
lbm_heap_explicit_free_array(array);
return false;
}
switch(tok_match_fixed_size_tokens(str)) {
case TOKCLOSEBRACK:
done = true;
break;
case NOTOKEN:
break;
default:
lbm_heap_explicit_free_array(array);
return false;
}
n = 0;
//float f_val;
token_int i_val;
token_float f_val;
if (!done) {
switch (t) {
case LBM_TYPE_BYTE:
n = tok_integer(str, &i_val);
if (n) ((uint8_t*)arr->data)[ix] = (uint8_t)(i_val.negative ? -i_val.value : i_val.value);
break;
case LBM_TYPE_I32:
n = tok_integer(str, &i_val);
if (n) arr->data[ix] = (uint32_t)(i_val.negative ? -i_val.value : i_val.value);
break;
case LBM_TYPE_U32:
n = tok_integer(str, &i_val);
if (n) arr->data[ix] = (uint32_t)(i_val.negative ? -i_val.value : i_val.value);
break;
case LBM_TYPE_FLOAT: {
float f = 0;
n = tok_D(str, &f_val);
if (n == 0) {
n = tok_integer(str, &i_val);
f = (float)i_val.value;
} else {
f = (float)f_val.value;
}
if (n) memcpy(&arr->data[ix], (uint32_t*)&f, sizeof(float));
}break;
}
if (n == 0) {
lbm_heap_explicit_free_array(array);
return false;
}
}
ix++;
}
lbm_uint array_size = ix - 1;
// Calculate array size in number of words
if (t == LBM_TYPE_BYTE) {
if (array_size % 4) {
array_size = (array_size / 4) + 1;
} else {
array_size = array_size / 4;
}
}
lbm_memory_shrink((lbm_uint*)arr->data, array_size);
arr->size = ix - 1;
*res = array;
return true;
}
lbm_value lbm_get_next_token(lbm_tokenizer_char_stream_t *str) {
char c_val;
@ -645,24 +531,13 @@ lbm_value lbm_get_next_token(lbm_tokenizer_char_stream_t *str) {
case TOKMATCHANY:
res = lbm_enc_sym(SYM_MATCH_ANY);
break;
case TOKOPENBRACK: {
lbm_uint num_free = lbm_memory_longest_free();
lbm_uint initial_size = (lbm_uint)((float)num_free * 0.9);
if (initial_size == 0) {
res = lbm_enc_sym(SYM_MERROR);
break;
}
lbm_value array;
if (parse_array(str, initial_size, &array)) {
res = array;
} else {
res = lbm_enc_sym(SYM_RERROR);
}
} break;
case TOKOPENBRACK:
res = lbm_enc_sym(SYM_OPENBRACK);
break;
case TOKCLOSEBRACK:
res = lbm_enc_sym(SYM_RERROR); // a closing bracket without matching open.
res = lbm_enc_sym(SYM_CLOSEBRACK);
break;
//res = lbm_enc_sym(SYM_RERROR); // a closing bracket without matching open.
default:
break;
}