From 4a62f042054b7247cdabedd504fe21b331c10b40 Mon Sep 17 00:00:00 2001 From: Benjamin Vedder Date: Mon, 5 Sep 2022 20:06:04 +0200 Subject: [PATCH] Squashed 'lispBM/lispBM/' changes from b6a136a2..4cf1df6f 4cf1df6f refactoring of the array-reader in preparation for future reader modifications to allow streaming of programs 6075bc8d adding some potentially dangerous omitted checks git-subtree-dir: lispBM/lispBM git-subtree-split: 4cf1df6f164d3a81188eb065e625455f12cbd869 --- include/lbm_defines.h | 3 + include/lbm_version.h | 5 ++ src/eval_cps.c | 159 ++++++++++++++++++++++++++++++++++++++++-- src/symrepr.c | 3 + src/tokpar.c | 137 ++---------------------------------- 5 files changed, 172 insertions(+), 135 deletions(-) diff --git a/include/lbm_defines.h b/include/lbm_defines.h index 1557ba95..94deb82b 100644 --- a/include/lbm_defines.h +++ b/include/lbm_defines.h @@ -178,6 +178,9 @@ #define SYM_DOT 0x76 #define SYM_QUOTE_IT 0x77 #define SYM_COLON 0x78 +#define SYM_TOKENIZER_WAIT 0x79 +#define SYM_OPENBRACK 0x80 +#define SYM_CLOSEBRACK 0x81 // Fundamental Operations #define FUNDAMENTALS_START 0x100 diff --git a/include/lbm_version.h b/include/lbm_version.h index d6437f67..74cc6392 100644 --- a/include/lbm_version.h +++ b/include/lbm_version.h @@ -33,6 +33,11 @@ extern "C" { /*! \page changelog Changelog +Set 5 2022: Version 0.6.0 + - Refactoring of array-reader. Array reading is nolonger done monolithically + inside of the tokpar framework, but rather as a cooperation between the + evaluator and the tokenizer. + Sep 3 2022: Version 0.6.0 - Round-robin scheduling + Addition of an Atomic construct to use with care. diff --git a/src/eval_cps.c b/src/eval_cps.c index 1e6e2ad8..3eeab2c2 100644 --- a/src/eval_cps.c +++ b/src/eval_cps.c @@ -71,7 +71,8 @@ #define READ_COMMAAT_RESULT ((39 << LBM_VAL_SHIFT) | LBM_TYPE_U) #define READ_COMMA_RESULT ((40 << LBM_VAL_SHIFT) | LBM_TYPE_U) #define READ_TERMINATE_COLON ((41 << LBM_VAL_SHIFT) | LBM_TYPE_U) - +#define READ_START_ARRAY ((42 << LBM_VAL_SHIFT) | LBM_TYPE_U) +#define READ_APPEND_ARRAY ((43 << LBM_VAL_SHIFT) | LBM_TYPE_U) @@ -1606,7 +1607,7 @@ static inline void cont_expand_macro(eval_context_t *ctx) { lbm_uint* sptr = lbm_get_stack_ptr(&ctx->K, 2); if (!sptr) { - error_ctx(ENC_SYM_TERROR); + error_ctx(ENC_SYM_FATAL_ERROR); return; } lbm_value env = (lbm_value)sptr[0]; @@ -1936,7 +1937,7 @@ static inline void cont_application(eval_context_t *ctx) { lbm_uint *fun_args = lbm_get_stack_ptr(&ctx->K, arg_count+1); if (fun_args == NULL) { - ctx->r = ENC_SYM_FATAL_ERROR; + error_ctx(ENC_SYM_FATAL_ERROR); return; } lbm_value fun = fun_args[0]; @@ -2298,6 +2299,10 @@ static inline void cont_read_next_token(eval_context_t *ctx) { lbm_pop(&ctx->K, &stream); lbm_stream_t *str = lbm_dec_stream(stream); + if (str == NULL || str->state == NULL) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state; lbm_value tok = token_stream_get(str); @@ -2313,6 +2318,11 @@ static inline void cont_read_next_token(eval_context_t *ctx) { error_ctx(ENC_SYM_MERROR); done_reading(ctx->id); return; + case SYM_TOKENIZER_WAIT: + CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN)); + ctx->app_cont = true; + yield_ctx(EVAL_CPS_MIN_SLEEP); + return; case SYM_TOKENIZER_DONE: /* Tokenizer reached "end of file" The parser could be in a state where it needs @@ -2360,6 +2370,15 @@ static inline void cont_read_next_token(eval_context_t *ctx) { CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN)); ctx->app_cont = true; break; + case SYM_CLOSEBRACK: + ctx->r = tok; + ctx->app_cont = true; + break; + case SYM_OPENBRACK: + CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_START_ARRAY)); + CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN)); + ctx->app_cont = true; + break; case SYM_QUOTE_IT: CHECK_STACK(lbm_push(&ctx->K, READ_QUOTE_RESULT)); CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN)); @@ -2398,6 +2417,124 @@ static inline void cont_read_next_token(eval_context_t *ctx) { } } +static inline void cont_read_start_array(eval_context_t *ctx) { + + lbm_value stream; + + lbm_pop(&ctx->K, &stream); + + lbm_uint num_free = lbm_memory_longest_free(); + lbm_uint initial_size = (lbm_uint)((float)num_free * 0.9); + if (initial_size == 0) { + gc(ENC_SYM_NIL, ENC_SYM_NIL); + num_free = lbm_memory_longest_free(); + initial_size = (lbm_uint)((float)num_free * 0.9); + if (initial_size == 0) { + error_ctx(ENC_SYM_MERROR); + return; + } + } + + if ((lbm_type_of(ctx->r) == LBM_TYPE_SYMBOL) && + ((ctx->r == ENC_SYM_TYPE_I32) || + (ctx->r == ENC_SYM_TYPE_U32) || + (ctx->r == ENC_SYM_TYPE_FLOAT) || + (ctx->r == ENC_SYM_TYPE_CHAR))) { + + lbm_type t; + switch (ctx->r) { + case ENC_SYM_TYPE_I32: t = LBM_TYPE_I32; break; + case ENC_SYM_TYPE_U32: t = LBM_TYPE_U32; break; + case ENC_SYM_TYPE_FLOAT: t = LBM_TYPE_FLOAT; break; + case ENC_SYM_TYPE_CHAR: t = LBM_TYPE_CHAR; break; + default: + error_ctx(ENC_SYM_TERROR); + return; + } + + if (ctx->r == ENC_SYM_TYPE_CHAR) { + initial_size = sizeof(lbm_uint) * initial_size; + } + + lbm_value array; + if (!lbm_heap_allocate_array(&array, initial_size, t)) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } + + CHECK_STACK(lbm_push_5(&ctx->K, array, lbm_enc_u(0), ctx->r, stream, READ_APPEND_ARRAY)); + CHECK_STACK(lbm_push_2(&ctx->K, stream, READ_NEXT_TOKEN)); + ctx->app_cont = true; + } else if (lbm_is_number(ctx->r)) { + lbm_value array; + if (!lbm_heap_allocate_array(&array, initial_size, LBM_TYPE_CHAR)) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } + + CHECK_STACK(lbm_push_5(&ctx->K, array, lbm_enc_u(0), ENC_SYM_TYPE_CHAR, stream, READ_APPEND_ARRAY)); + ctx->app_cont = true; + } else { + error_ctx(ENC_SYM_RERROR); + } +} + +static inline void cont_read_append_array(eval_context_t *ctx) { + + lbm_uint *sptr = lbm_get_stack_ptr(&ctx->K, 4); + if (!sptr) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } + + lbm_value array = sptr[0]; + lbm_value ix = lbm_dec_as_u32(sptr[1]); + lbm_value type = sptr[2]; + lbm_value stream = sptr[3]; + + lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(array); // TODO: Check + + if (lbm_is_number(ctx->r)) { + switch(type) { + case ENC_SYM_TYPE_CHAR: + ((uint8_t*)arr->data)[ix] = (uint8_t)lbm_dec_as_u32(ctx->r); + break; + case ENC_SYM_TYPE_I32: + ((lbm_int*)arr->data)[ix] = lbm_dec_as_i32(ctx->r); + break; + case ENC_SYM_TYPE_U32: + ((lbm_uint*)arr->data)[ix] = lbm_dec_as_u32(ctx->r); + break; + case ENC_SYM_TYPE_FLOAT: { + float f = lbm_dec_as_float(ctx->r); + memcpy(&arr->data[ix], (uint32_t*)&f, sizeof(float)); + } break; + default: + error_ctx(ENC_SYM_TERROR); + return; + } + sptr[1] = lbm_enc_u(ix + 1); + CHECK_STACK(lbm_push_3(&ctx->K, READ_APPEND_ARRAY, stream, READ_NEXT_TOKEN)); + ctx->app_cont = true; + } else if (lbm_is_symbol(ctx->r) && lbm_dec_sym(ctx->r) == SYM_CLOSEBRACK) { + lbm_uint array_size = ix; + if (type == ENC_SYM_TYPE_CHAR) { + if (array_size % 4) { + array_size = (array_size / 4) + 1; + } else { + array_size = array_size / 4; + } + } + lbm_memory_shrink((lbm_uint*)arr->data, array_size); + arr->size = ix; + lbm_stack_drop(&ctx->K, 4); + ctx->r = array; + ctx->app_cont = true; + } else { + error_ctx(ENC_SYM_TERROR); + } +} + static inline void cont_read_append_continue(eval_context_t *ctx) { lbm_value *sptr = lbm_get_stack_ptr(&ctx->K, 3); @@ -2453,6 +2590,10 @@ static inline void cont_read_expect_closepar(eval_context_t *ctx) { lbm_pop_2(&ctx->K, &res, &stream); lbm_stream_t *str = lbm_dec_stream(stream); + if (str == NULL || str->state == NULL) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state; if (lbm_type_of(ctx->r) == LBM_TYPE_SYMBOL && @@ -2479,6 +2620,10 @@ static inline void cont_read_dot_terminate(eval_context_t *ctx) { lbm_value stream = sptr[2]; lbm_stream_t *str = lbm_dec_stream(stream); + if (str == NULL || str->state == NULL) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state; lbm_stack_drop(&ctx->K ,3); @@ -2516,6 +2661,10 @@ static inline void cont_read_done(eval_context_t *ctx) { lbm_pop_2(&ctx->K, &stream, &prg_tag); lbm_stream_t *str = lbm_dec_stream(stream); + if (str == NULL || str->state == NULL) { + error_ctx(ENC_SYM_FATAL_ERROR); + return; + } lbm_tokenizer_char_stream_t *s = (lbm_tokenizer_char_stream_t*)str->state; lbm_value tok = token_stream_get(str); @@ -2725,6 +2874,8 @@ static void evaluation_step(void){ case READ_COMMAAT_RESULT: cont_read_commaat_result(ctx); return; case READ_COMMA_RESULT: cont_read_comma_result(ctx); return; case READ_TERMINATE_COLON: cont_read_terminate_colon(ctx); return; + case READ_START_ARRAY: cont_read_start_array(ctx); return; + case READ_APPEND_ARRAY: cont_read_append_array(ctx); return; default: error_ctx(ENC_SYM_EERROR); return; @@ -2894,7 +3045,7 @@ void lbm_run_eval(void){ eval_context_t *next_to_run = NULL; if (eval_steps_quota <= 0 || !ctx_running) { - uint32_t us; + uint32_t us = EVAL_CPS_MIN_SLEEP; if (is_atomic) { if (ctx_running) { diff --git a/src/symrepr.c b/src/symrepr.c index 9a621ee1..76a1acfa 100644 --- a/src/symrepr.c +++ b/src/symrepr.c @@ -106,6 +106,9 @@ special_sym const special_symbols[] = { {"sym_tok_done" , SYM_TOKENIZER_DONE}, {"sym_quote_it" , SYM_QUOTE_IT}, {"sym_colon" , SYM_COLON}, + {"sym_tok_wait" , SYM_TOKENIZER_WAIT}, + {"sym_openbrack" , SYM_OPENBRACK}, + {"sym_closebrack" , SYM_CLOSEBRACK}, // special symbols with parseable names {"type-list" , SYM_TYPE_LIST}, diff --git a/src/tokpar.c b/src/tokpar.c index e694f0bc..1e9868b9 100644 --- a/src/tokpar.c +++ b/src/tokpar.c @@ -452,120 +452,6 @@ int tok_integer(lbm_tokenizer_char_stream_t *str, token_int *result ) { return 0; } - -bool parse_array(lbm_tokenizer_char_stream_t *str, lbm_uint initial_size, lbm_value *res) { - - lbm_type t = LBM_TYPE_BYTE; // default - - int n = 0; - clean_whitespace(str); - if (!more(str)) { - return false; - } - - n = tok_symbol(str); - - if (n > 0) { - if (strncmp(sym_str, "type-i32", (uint32_t)n) == 0) { - t = LBM_TYPE_I32; - } else if (strncmp(sym_str, "type-u32", (uint32_t)n) == 0) { - t = LBM_TYPE_U32; - } else if (strncmp(sym_str, "type-float", (uint32_t)n) == 0) { - t = LBM_TYPE_FLOAT; - } else if (strncmp(sym_str, "type-byte", (uint32_t)n) == 0) { - t = LBM_TYPE_BYTE; - initial_size = sizeof(lbm_uint) * initial_size; - } - } else { - t = LBM_TYPE_BYTE; - initial_size = sizeof(lbm_uint) * initial_size; - } - - lbm_value array; - if (!lbm_heap_allocate_array(&array, initial_size, t)) { - return false; - } - lbm_array_header_t *arr = (lbm_array_header_t*)lbm_car(array); - - bool done = false; - - lbm_uint ix = 0; - - while (!done) { - clean_whitespace(str); - if (!more(str)) { - lbm_heap_explicit_free_array(array); - return false; - } - - switch(tok_match_fixed_size_tokens(str)) { - case TOKCLOSEBRACK: - done = true; - break; - case NOTOKEN: - break; - default: - lbm_heap_explicit_free_array(array); - return false; - } - - n = 0; - //float f_val; - - token_int i_val; - token_float f_val; - - if (!done) { - switch (t) { - case LBM_TYPE_BYTE: - n = tok_integer(str, &i_val); - if (n) ((uint8_t*)arr->data)[ix] = (uint8_t)(i_val.negative ? -i_val.value : i_val.value); - break; - case LBM_TYPE_I32: - n = tok_integer(str, &i_val); - if (n) arr->data[ix] = (uint32_t)(i_val.negative ? -i_val.value : i_val.value); - break; - case LBM_TYPE_U32: - n = tok_integer(str, &i_val); - if (n) arr->data[ix] = (uint32_t)(i_val.negative ? -i_val.value : i_val.value); - break; - case LBM_TYPE_FLOAT: { - float f = 0; - n = tok_D(str, &f_val); - if (n == 0) { - n = tok_integer(str, &i_val); - f = (float)i_val.value; - } else { - f = (float)f_val.value; - } - if (n) memcpy(&arr->data[ix], (uint32_t*)&f, sizeof(float)); - }break; - } - if (n == 0) { - lbm_heap_explicit_free_array(array); - return false; - } - } - ix++; - } - - lbm_uint array_size = ix - 1; - - // Calculate array size in number of words - if (t == LBM_TYPE_BYTE) { - if (array_size % 4) { - array_size = (array_size / 4) + 1; - } else { - array_size = array_size / 4; - } - } - - lbm_memory_shrink((lbm_uint*)arr->data, array_size); - arr->size = ix - 1; - *res = array; - return true; -} - lbm_value lbm_get_next_token(lbm_tokenizer_char_stream_t *str) { char c_val; @@ -645,24 +531,13 @@ lbm_value lbm_get_next_token(lbm_tokenizer_char_stream_t *str) { case TOKMATCHANY: res = lbm_enc_sym(SYM_MATCH_ANY); break; - case TOKOPENBRACK: { - lbm_uint num_free = lbm_memory_longest_free(); - lbm_uint initial_size = (lbm_uint)((float)num_free * 0.9); - - if (initial_size == 0) { - res = lbm_enc_sym(SYM_MERROR); - break; - } - - lbm_value array; - if (parse_array(str, initial_size, &array)) { - res = array; - } else { - res = lbm_enc_sym(SYM_RERROR); - } - } break; + case TOKOPENBRACK: + res = lbm_enc_sym(SYM_OPENBRACK); + break; case TOKCLOSEBRACK: - res = lbm_enc_sym(SYM_RERROR); // a closing bracket without matching open. + res = lbm_enc_sym(SYM_CLOSEBRACK); + break; + //res = lbm_enc_sym(SYM_RERROR); // a closing bracket without matching open. default: break; }