/*
Copyright 2018 , 2022 Joel Svensson svenssonjoel@yahoo.se
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/** \file heap.h */
#ifndef HEAP_H_
#define HEAP_H_
#include
#include "lbm_types.h"
#include "symrepr.h"
#include "streams.h"
#include "stack.h"
/*
Planning for a more space efficient heap representation.
TODO: Need to find a good reference to read up on this.
- List based heap
- Easy to implement and somewhat efficient
0000 0000 Size Free bits
003F FFFF 4MB 10
007F FFFF 8MB 9
00FF FFFF 16MB 8
01FF FFFF 32MB 7
03FF FFFF 64MB 6 * Kind of heap size I am looking for
07FF FFFF 128MB 5
0FFF FFFF 256MB 4
1FFF FFFF 512MB 3
--- May 9 2021 ---
Actually now I am much more interested in way smaller memories ;)
0000 0000 Size Free bits
0000 0FFF 4KB 20 |
0000 1FFF 8KB 19 |
0000 3FFF 16KB 18 |
0000 7FFF 32KB 17 |
0000 FFFF 64KB 16 |
0001 FFFF 128KB 15 |
0003 FFFF 256KB 14 | - This range is very interesting.
0007 FFFF 512KB 13
000F FFFF 1MB 12
001F FFFF 2MB 11
003F FFFF 4MB 10
007F FFFF 8MB 9
00FF FFFF 16MB 8
01FF FFFF 32MB 7
03FF FFFF 64MB 6
07FF FFFF 128MB 5
0FFF FFFF 256MB 4
1FFF FFFF 512MB 3
Those are the kind of platforms that are fun... so a bunch of
wasted bits in heap pointers if we run on small MCUs.
-----------------
it is also the case that not all addresses will be used if all "cells" are
of the same size, 8 bytes...
value 0: 0000 0000
value 1: 0000 0008
value 3: 0000 0010
value 4: 0000 0018
Means bits 0,1,2 will always be empty in a valid address.
Cons cells also need to be have room for 2 pointers. So each allocated cell from
memory should be 8bytes.
Things that needs to be represented within these bits:
- GC MARK one per cell
- TYPE: type of CAR and type of cons
Types I would want:
- Full 32bit integer. Does not leave room for identification of type
- Float values. Same problem
Free bits in pointers 64MB heap:
31 30 29 28 27 26 2 1 0
0 0 0 0 0 0 XX XXXX XXXX XXXX XXXX XXXX X 0 0 0
Information needed for each cell:
Meaning | bits total | bits per car | bits per cdr
GC mark | 2 | 1 | 1 - only one of them will be used (the other is wasted)
Type | 2x | x | x
Ptr/!ptr | 2 | 1 | 1
Types (unboxed):
- Symbols
- 28bit integer ( will need signed shift right functionality )
- 28bit unsigned integer
- Character
If four types is all that should be possible (unboxed). then 2 bits are needed to differentiate.
2 + 1 + 1 = 4 => 28bits for data.
bit 0: ptr/!ptr
bit 1: gc
bit 2-3: type (if not ptr)
bit 3 - 24 ptr (if ptr)
bit 4 - 31 value (if value)
An unboxed value can occupy a car or cdr field in a cons cell.
types (boxed) extra information in pointer to cell can contain information
- 32 bit integer
- 32 bit unsigned integer
- 32 bit float
boxed representation:
[ptr| cdr]
|
[Value | Aux + GC_MARK]
Kinds of pointers:
- Pointer to cons cell.
- Pointer to unboxed value (fixnums not in a list, I hope this is so rare that it can be removed )
- integer
- unsigned integer
- symbol
- float
- Pointer to boxed value.
- 32 bit integer
- 32 bit unsigned integer
- 32 bit float
- (Maybe something else ? Vectors/strings allocated in memory not occupied by heap?)
- vector of int
- vector of uint
- vector of float
- vector of double
- String
13 pointer"types" -> needs 4 bits
for 64MB heap there are 6 free bits. So with this scheme going to 128MB or 256MB heap
is also possible
a pointer to some off heap vector/string could be represented by
[ptr | cdr]
|
[full pointer | Aux + GC_MARK]
|
[VECTOR]
Aux bits could be used for storing vector size. Up to 30bits should be available there
>> This is problematic. Now the information that something is a vector is split up
>> between 2 cons cells. This means GC needs both of these intact to be able to make
>> proper decision.
>> Will try to resolve this by adding some special symbols. But these must be symbols
>> that cannot occur normally in programs. Then an array could be:
[Full pointer | ARRAY_SYM + GC_MARK]
|
[VECTOR]
>> Boxed values same treatment as above.
>> TODO: Could this be simpler?
[ VALUE | TYPE_SYM + GC_MARK]
0000 00XX XXXX XXXX XXXX XXXX XXXX X000 : 0x03FF FFF8
1111 AA00 0000 0000 0000 0000 0000 0000 : 0xFC00 0000 (AA bits left unused for now, future heap growth?)
*/
#define LBM_CONS_CELL_SIZE 8
#define LBM_ADDRESS_SHIFT 3
#define LBM_VAL_SHIFT 4
#define LBM_PTR_MASK 0x00000001u
#define LBM_PTR_BIT 0x00000001u
#define LBM_PTR_VAL_MASK 0x03FFFFF8u
#define LBM_PTR_TYPE_MASK 0xFC000000u
#define LBM_PTR_TYPE_CONS 0x10000000u
#define LBM_PTR_TYPE_BOXED_U 0x20000000u
#define LBM_PTR_TYPE_BOXED_I 0x30000000u
#define LBM_PTR_TYPE_BOXED_F 0x40000000u
#define LBM_PTR_TYPE_ARRAY 0xD0000000u
#define LBM_PTR_TYPE_REF 0xE0000000u
#define LBM_PTR_TYPE_STREAM 0xF0000000u
#define LBM_GC_MASK 0x00000002u
#define LBM_GC_MARKED 0x00000002u
#define LBM_VAL_MASK 0xFFFFFFF0u
#define LBM_VAL_TYPE_MASK 0x0000000Cu
// gc ptr
#define LBM_VAL_TYPE_SYMBOL 0x00000000u // 00 0 0
/// Character or byte.
#define LBM_VAL_TYPE_CHAR 0x00000004u // 01 0 0
#define LBM_VAL_TYPE_BYTE 0x00000004u
#define LBM_VAL_TYPE_U 0x00000008u // 10 0 0
#define LBM_VAL_TYPE_I 0x0000000Cu // 11 0 0
/** Struct representing a heap cons-cell.
*
*/
typedef struct {
lbm_value car;
lbm_value cdr;
} lbm_cons_t;
/**
* Heap state
*/
typedef struct {
lbm_cons_t *heap;
lbm_value freelist; // list of free cons cells.
lbm_stack_t gc_stack;
uint32_t heap_size; // In number of cells.
uint32_t heap_bytes; // In bytes.
uint32_t num_alloc; // Number of cells allocated.
uint32_t num_alloc_arrays; // Number of arrays allocated.
uint32_t gc_num; // Number of times gc has been performed.
uint32_t gc_marked; // Number of cells marked by mark phase.
uint32_t gc_recovered; // Number of cells recovered by sweep phase.
uint32_t gc_recovered_arrays;// Number of arrays recovered by sweep.
uint32_t gc_least_free; // The smallest length of the freelist.
uint64_t gc_time_acc;
uint32_t gc_min_duration;
uint32_t gc_max_duration;
} lbm_heap_state_t;
/**
* The header portion of an array stored in array and symbol memory.
*/
typedef struct {
lbm_type elt_type; /// Type of elements: VAL_TYPE_FLOAT, U, I or CHAR
uint32_t size; /// Number of elements
uint32_t *data; /// pointer to lbm_memory array or C array.
} lbm_array_header_t;
/** Initialize heap storage.
* \param addr Pointer to an array of lbm_cons_t elements. This array must at least be aligned 4.
* \param num_cells Number of lbm_cons_t elements in the array.
* \param gc_stack_storage uint32_t pointer to space to use as "recursion" stack for GC
* \param gc_stack_size Size of the gc_stack in number of words.
* \return 1 on success or 0 for failure.
*/
extern int lbm_heap_init(lbm_cons_t *addr, uint32_t num_cells,
uint32_t *gc_stack_storage, uint32_t gc_stack_size);
/** Add GC time statistics to heap_stats
*
* \param dur Duration as reported by the timestamp callback.
*/
extern void lbm_heap_new_gc_time(uint32_t dur);
/** Add a new free_list length to the heap_stats.
*
* \param l Current length of freelist.
*/
extern void lbm_heap_new_freelist_length(uint32_t l);
/** Check how many lbm_cons_t cells are on the free-list
*
* \return Number of free lbm_cons_t cells.
*/
extern unsigned int lbm_heap_num_free(void);
/** Check how many lbm_cons_t cells are allocated.
*
* \return Number of lbm_cons_t cells that are currently allocated.
*/
extern unsigned int lbm_heap_num_allocated(void);
/** Size of the heap in number of lbm_cons_t cells.
*
* \return Size of the heap in number of lbm_cons_t cells.
*/
extern unsigned int lbm_heap_size(void);
/** Size of the heap in bytes.
*
* \return Size of heap in bytes.
*/
extern unsigned int lbm_heap_size_bytes(void);
/** Allocate an lbm_cons_t cell from the heap.
*
* \param type A type that can be encoded onto the cell (most often LBM_PTR_TYPE_CONS).
* \return An lbm_value referring to a cons_cell or enc_sym(SYM_MERROR) in case the heap is full.
*/
extern lbm_value lbm_heap_allocate_cell(lbm_type type);
/** Decode an lbm_value representing a string into a C string
*
* \param val Value
* \return String or NULL if the value does not encode a string.
*/
extern char *lbm_dec_str(lbm_value val);
/** Decode an lbm_value representing a stream into an lbm_stream_t pointer.
*
* \param val Value
* \return A pointer to an lbm_stream_t or NULL if the value does not encode a stream.
*/
extern lbm_stream_t *lbm_dec_stream(lbm_value val);
/** Decode a numerical value as if it is unsigned
*
* \param val Value to decode
* \return The value encoded in val casted to an unsigned int. Returns 0 if val does not encode a number.
*/
extern lbm_uint lbm_dec_as_u(lbm_value val);
/** Decode a numerical value as a signed integer.
*
* \param val Value to decode
* \return The value encoded in val casted to a signed int. Returns 0 if val does not encode a number.
*/
extern lbm_int lbm_dec_as_i(lbm_value val);
/** Decode a numerical value as a float.
*
* \param val Value to decode.
* \return The value encoded in val casted to a float. Returns 0 if val does not encode a number.
*/
extern lbm_float lbm_dec_as_f(lbm_value val);
extern lbm_uint lbm_dec_raw(lbm_value v);
/** Allocates an lbm_cons_t cell from the heap and populates it.
*
* \param car The value to put in the car field of the allocated lbm_cons_t.
* \param cdr The value to put in the cdr field of the allocated lbm_cons_t.
* \return A value referencing the lbm_cons_t or enc_sym(SYM_MERROR) if heap is full.
*/
extern lbm_value lbm_cons(lbm_value car, lbm_value cdr);
/** Accesses the car field of an lbm_cons_t.
*
* \param cons Value
* \return The car field of the lbm_cons_t if cons is a reference to a heap cell.
* If cons is nil, the return value is nil. If the value
* is not cons or nil, the return value is enc_sym(SYM_TERROR) for type error.
*/
extern lbm_value lbm_car(lbm_value cons);
/** Accesses the cdr field of an lbm_cons_t.
*
* \param cons Value
* \return The cdr field of the lbm_cons_t if cons is a reference to a heap cell.
* If cons is nil, the return value is nil. If the value
* if not cons or nil, the return value is enc_sym(SYM_TERROR) for type error.
*/
extern lbm_value lbm_cdr(lbm_value cons);
/** Update the value stored in the car field of a heap cell.
*
* \param c Value referring to a heap cell.
* \param v Value to replace the car field with.
* \return 1 on success and 0 if the c value does not refer to a heap cell.
*/
extern int lbm_set_car(lbm_value c, lbm_value v);
/** Update the value stored in the cdr field of a heap cell.
*
* \param c Value referring to a heap cell.
* \param v Value to replace the cdr field with.
* \return 1 on success and 0 if the c value does not refer to a heap cell.
*/
extern int lbm_set_cdr(lbm_value c, lbm_value v);
// List functions
/** Calculate the length of a proper list
* \warning This is a dangerous function that should be used carefully. Cyclic structures on the heap
* may lead to the function not terminating.
*
* \param c A list
* \return The length of the list. Unless the value is a cyclic structure on the heap, this function will terminate.
*/
extern unsigned int lbm_list_length(lbm_value c);
/** Reverse a proper list
* \warning This is a dangerous function that should be used carefully. Cyclic structures on the heap
* may lead to the function not terminating.
*
* \param list A list
* \return The list reversed or enc_sym(SYM_MERROR) if heap is full.
*/
extern lbm_value lbm_list_reverse(lbm_value list);
/** Reverse a proper list destroying the original.
* \warning This is a dangerous function that should be used carefully. Cyclic structures on the heap
* may lead to the function not terminating.
*
* \param list A list
* \return The list reversed
*/
extern lbm_value lbm_list_destructive_reverse(lbm_value list);
/** Copy a list
* \warning This is a dangerous function that should be used carefully. Cyclic structures on the heap
* may lead to the function not terminating.
*
* \param list A list.
* \return Reversed list or enc_sym(SYM_MERROR) if heap is full.
*/
extern lbm_value lbm_list_copy(lbm_value list);
/** A destructive append of two lists
*
* \param list1 A list
* \param list2 A list
* \return list1 with list2 appended at the end.
*/
extern lbm_value lbm_list_append(lbm_value list1, lbm_value list2);
// State and statistics
/** Get a copy of the heap statistics structure.
*
* \param A pointer to an lbm_heap_state_t to populate
* with the current statistics.
*/
extern void lbm_get_heap_state(lbm_heap_state_t *);
// Garbage collection
/** Increment the counter that is counting the number of times GC ran
*
*/
extern void lbm_gc_state_inc(void);
/** Mark all heap cells that are on the free-list.
*
* \return 1 on success or 0 if the free-list is corrupted.
*/
extern int lbm_gc_mark_freelist(void);
/** Mark heap cells reachable from the lbm_value v.
*
* \param v Root node to start marking from.
* \return 1 on success and 0 if the stack used internally is full.
*/
extern int lbm_gc_mark_phase(lbm_value v);
extern int lbm_gc_mark_phase2(lbm_value env);
/** Performs lbm_gc_mark_phase on all the values of an array.
*
* \param data Array of roots to traverse from.
* \param n Number of elements in roots-array.
* \return 1 on success or 0 for failure.
*/
extern int lbm_gc_mark_aux(lbm_uint *data, unsigned int n);
/** Sweep up all non marked heap cells and place them on the free list.
*
* \return 1
*/
extern int lbm_gc_sweep_phase(void);
// Array functionality
/** Allocate an array in symbols and arrays memory (lispbm_memory.h)
* and create a heap cell that refers to this array.
* \param res The resulting lbm_value is returned through this argument.
* \param size Array size in number of 32 bit words.
* \param type The type information to encode onto the heap cell.
* \return 1 for success of 0 for failure.
*/
extern int lbm_heap_allocate_array(lbm_value *res, unsigned int size, lbm_type type);
/** Query the type information of a value.
*
* \param x Value to check the type of.
* \return The type information.
*/
static inline lbm_type lbm_type_of(lbm_value x) {
return (x & LBM_PTR_MASK) ? (x & LBM_PTR_TYPE_MASK) : (x & LBM_VAL_TYPE_MASK);
}
static inline bool lbm_is_ptr(lbm_value x) {
return (x & LBM_PTR_MASK);
}
static inline lbm_value lbm_enc_cons_ptr(lbm_uint x) {
return ((x << LBM_ADDRESS_SHIFT) | LBM_PTR_TYPE_CONS | LBM_PTR_BIT);
}
static inline lbm_uint lbm_dec_ptr(lbm_value p) {
return ((LBM_PTR_VAL_MASK & p) >> LBM_ADDRESS_SHIFT);
}
static inline lbm_value lbm_set_ptr_type(lbm_value p, lbm_type t) {
return (LBM_PTR_VAL_MASK & p) | t | LBM_PTR_BIT;
}
static inline lbm_value lbm_enc_sym(uint32_t s) {
return (s << LBM_VAL_SHIFT) | LBM_VAL_TYPE_SYMBOL;
}
static inline lbm_value lbm_enc_i(lbm_int x) {
return ((lbm_uint)x << LBM_VAL_SHIFT) | LBM_VAL_TYPE_I;
}
static inline lbm_value lbm_enc_u(lbm_uint x) {
return (x << LBM_VAL_SHIFT) | LBM_VAL_TYPE_U;
}
static inline lbm_value lbm_enc_I(lbm_int x) {
lbm_value i = lbm_cons((lbm_uint)x, lbm_enc_sym(SYM_BOXED_I_TYPE));
if (lbm_type_of(i) == LBM_VAL_TYPE_SYMBOL) return i;
return lbm_set_ptr_type(i, LBM_PTR_TYPE_BOXED_I);
}
static inline lbm_value lbm_enc_U(lbm_uint x) {
lbm_value u = lbm_cons(x, lbm_enc_sym(SYM_BOXED_U_TYPE));
if (lbm_type_of(u) == LBM_VAL_TYPE_SYMBOL) return u;
return lbm_set_ptr_type(u, LBM_PTR_TYPE_BOXED_U);
}
static inline lbm_value lbm_enc_F(lbm_float x) {
lbm_uint t;
memcpy(&t, &x, sizeof(float));
lbm_value f = lbm_cons(t, lbm_enc_sym(SYM_BOXED_F_TYPE));
if (lbm_type_of(f) == LBM_VAL_TYPE_SYMBOL) return f;
return lbm_set_ptr_type(f, LBM_PTR_TYPE_BOXED_F);
}
static inline lbm_value lbm_enc_char(char x) {
return ((lbm_uint)x << LBM_VAL_SHIFT) | LBM_VAL_TYPE_CHAR;
}
static inline lbm_int lbm_dec_i(lbm_value x) {
return (lbm_int)x >> LBM_VAL_SHIFT;
}
static inline lbm_uint lbm_dec_u(lbm_value x) {
return x >> LBM_VAL_SHIFT;
}
static inline char lbm_dec_char(lbm_value x) {
return (char)(x >> LBM_VAL_SHIFT);
}
static inline lbm_uint lbm_dec_sym(lbm_value x) {
return x >> LBM_VAL_SHIFT;
}
static inline lbm_float lbm_dec_F(lbm_value x) { // Use only when knowing that x is a VAL_TYPE_F
lbm_float f_tmp;
lbm_uint tmp = lbm_car(x);
memcpy(&f_tmp, &tmp, sizeof(lbm_float));
return f_tmp;
}
static inline lbm_uint lbm_dec_U(lbm_value x) {
return lbm_car(x);
}
static inline lbm_int lbm_dec_I(lbm_value x) {
return (lbm_int)lbm_car(x);
}
static inline lbm_value lbm_set_gc_mark(lbm_value x) {
return x | LBM_GC_MARKED;
}
static inline lbm_value lbm_clr_gc_mark(lbm_value x) {
return x & ~LBM_GC_MASK;
}
static inline bool lbm_get_gc_mark(lbm_value x) {
return x & LBM_GC_MASK;
}
static inline bool lbm_is_number(lbm_value x) {
lbm_uint t = lbm_type_of(x);
return ((t == LBM_VAL_TYPE_I) ||
(t == LBM_VAL_TYPE_U) ||
(t == LBM_VAL_TYPE_CHAR) ||
(t == LBM_PTR_TYPE_BOXED_I) ||
(t == LBM_PTR_TYPE_BOXED_U) ||
(t == LBM_PTR_TYPE_BOXED_F));
}
static inline bool lbm_is_char(lbm_value x) {
lbm_uint t = lbm_type_of(x);
return (t == LBM_VAL_TYPE_CHAR);
}
static inline bool lbm_is_special(lbm_value symrep) {
return ((lbm_type_of(symrep) == LBM_VAL_TYPE_SYMBOL) &&
(lbm_dec_sym(symrep) < SPECIAL_SYMBOLS_END));
}
static inline bool lbm_is_fundamental(lbm_value symrep) {
return ((lbm_type_of(symrep) == LBM_VAL_TYPE_SYMBOL) &&
(lbm_dec_sym(symrep) >= FUNDAMENTALS_START) &&
(lbm_dec_sym(symrep) <= FUNDAMENTALS_END));
}
static inline bool lbm_is_closure(lbm_value exp) {
return ((lbm_type_of(exp) == LBM_PTR_TYPE_CONS) &&
(lbm_type_of(lbm_car(exp)) == LBM_VAL_TYPE_SYMBOL) &&
(lbm_dec_sym(lbm_car(exp)) == SYM_CLOSURE));
}
static inline bool lbm_is_continuation(lbm_value exp) {
return ((lbm_type_of(exp) == LBM_PTR_TYPE_CONS) &&
(lbm_type_of(lbm_car(exp)) == LBM_VAL_TYPE_SYMBOL) &&
(lbm_dec_sym(lbm_car(exp)) == SYM_CONT));
}
static inline bool lbm_is_macro(lbm_value exp) {
return ((lbm_type_of(exp) == LBM_PTR_TYPE_CONS) &&
(lbm_type_of(lbm_car(exp)) == LBM_VAL_TYPE_SYMBOL) &&
(lbm_dec_sym(lbm_car(exp)) == SYM_MACRO));
}
static inline bool lbm_is_match_binder(lbm_value exp) {
return ((lbm_type_of(exp) == LBM_PTR_TYPE_CONS) &&
(lbm_type_of(lbm_car(exp)) == LBM_VAL_TYPE_SYMBOL) &&
((lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_ANY) ||
(lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_I28) ||
(lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_U28) ||
(lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_I32) ||
(lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_U32) ||
(lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_FLOAT) ||
(lbm_dec_sym(lbm_car(exp)) == SYM_MATCH_CONS)));
}
static inline bool lbm_is_symbol(lbm_value exp) {
return (lbm_type_of(exp) == LBM_VAL_TYPE_SYMBOL);
}
static inline bool lbm_is_symbol_nil(lbm_value exp) {
return (lbm_is_symbol(exp) && lbm_dec_sym(exp) == SYM_NIL);
}
static inline bool lbm_is_symbol_eval(lbm_value exp) {
return (lbm_is_symbol(exp) && lbm_dec_sym(exp) == SYM_EVAL);
}
static inline bool lbm_is_symbol_merror(lbm_value exp) {
return (lbm_is_symbol(exp) && lbm_dec_sym(exp) == SYM_MERROR);
}
/* all error signaling symbols are in the range 0x20 - 0x2F */
static inline bool lbm_is_error(lbm_value v){
if (lbm_type_of(v) == LBM_VAL_TYPE_SYMBOL &&
((lbm_dec_sym(v) & 0xFFFFFF20) == 0x20)) {
return true;
}
return false;
}
#endif