chia-blockchain/lib/chiavdf/fast_vdf/parameters.h

//have to pass one of these in as a macro
//#define VDF_MODE 0 //used for the final submission and correctness testing
//#define VDF_MODE 1 //used for performance or other testing

//also have to pass in one of these
//#define ENABLE_ALL_INSTRUCTIONS 1
//#define ENABLE_ALL_INSTRUCTIONS 0

//
//

//divide table
const int divide_table_index_bits=11;
const int gcd_num_quotient_bits=31; //excludes sign bit
const int data_size=31;
const int gcd_base_max_iter_divide_table=16;

//continued fraction table
const int gcd_table_num_exponent_bits=3;
const int gcd_table_num_fraction_bits=7;
const int gcd_base_max_iter=5;

#if ENABLE_ALL_INSTRUCTIONS==1
    const bool use_divide_table=true;
    const int gcd_base_bits=63;
    const int gcd_128_max_iter=2;
#else
    const bool use_divide_table=false;
    const int gcd_base_bits=50;
    const int gcd_128_max_iter=3;
#endif

/*
divide_table_index bits
10 - 0m1.269s
11 - 0m1.261s
12 - 0m1.262s
13 - 0m1.341s
**/

/*
gcd_base_max_iter_divide_table
13 - 0m1.290s
14 - 0m1.275s
15 - 0m1.265s
16 - 0m1.261s
17 - 0m1.268s
18 - 0m1.278s
19 - 0m1.283s
**/

/*
100k iterations; median of 3 runs. consistency between runs was very high

effect of scheduler:
taskset 0,1     : 0m1.352s (63% speedup single thread, 37% over 0,2)
taskset 0,2     : 0m1.850s
default         : 0m1.348s (fastest)
single threaded : 0m2.212s [this has gone down to 0m1.496s for some reason with the divide table]

exponent    fraction    base_bits   base_iter   128_iter    seconds
3           7           50          5           3           0m1.350s    [fastest with range checks enabled]
3           7           52          5           3           0m1.318s    [range checks disabled; 2.4% faster]

[this block with bmi and fma disabled]
3           7           46          5           3           0m1.426s
3           7           47          5           3           0m1.417s
3           7           48          5           3           0m1.421s
3           7           49          5           3           0m1.413s
3           7           50          5           3           0m1.401s    [still fastest; bmi+fma is 3.8% faster]
3           7           51          5           3           0m1.406s
3           7           52          5           3           0m1.460s
3           7           50          6           3           0m1.416s

3           7           49          6           3           0m1.376s

2           8           45          6           3           0m1.590s
2           8           49          6           3           0m1.485s
2           8           51          6           3           0m1.479s
2           8           52          6           3           0m1.501s
2           8           53          6           3           0m1.531s
2           8           54          6           3           0m13.675s
2           8           55          6           3           0m13.648s

3           7           49          2           3           0m14.571s
3           7           49          3           3           0m1.597s
3           7           49          4           3           0m1.430s
3           7           49          5           3           0m1.348s
3           7           49          6           3           0m1.376s
3           7           49          10          3           0m1.485s

3           7           49          1           18          0m2.226s
3           7           49          2           10          0m1.756s
3           7           49          3           6           0m1.557s
3           7           49          4           4           0m1.388s
3           7           49          5           4           0m1.525s
3           7           49          6           3           0m1.377s
3           7           49          7           3           0m1.446s
3           7           49          8           2           0m1.503s

3           6           45          4           3           0m15.176s
3           7           45          4           3           0m1.443s
3           8           45          4           3           0m1.386s
3           9           45          4           3           0m1.355s
3           10          45          4           3           0m1.353s
3           11          45          4           3           0m1.419s
3           12          45          4           3           0m1.451s
3           13          45          4           3           0m1.584s

3           7           40          4           2           0m1.611s
3           8           40          4           2           0m1.570s
3           9           40          4           2           0m1.554s
3           10          40          4           2           0m1.594s
3           11          40          4           2           0m1.622s
3           12          40          4           2           0m1.674s
3           13          40          4           2           0m1.832s

3           7           48          5           3           0m1.358s
3           7           49          5           3           0m1.353s
3           7           50          5           3           0m1.350s

3           8           48          5           3           0m1.366s
3           8           49          5           3           0m1.349s
3           8           50          5           3           0m1.334s

3           9           48          5           3           0m1.370s
3           9           49          5           3           0m1.349s
3           9           50          5           3           0m1.346s

3           10          48          5           3           0m1.404s
3           10          49          5           3           0m1.382s
3           10          50          5           3           0m1.379s
***/

const uint64 max_spin_counter=10000000;

//this value makes square_original not be called in 100k iterations. with every iteration reduced, minimum value is 1
const int num_extra_bits_ab=3;

const bool calculate_k_repeated_mod=false;
const bool calculate_k_repeated_mod_interval=1;

const int validate_interval=1; //power of 2. will check the discriminant in the slave thread at this interval. -1 to disable. no effect on performance
const int checkpoint_interval=10000; //at each checkpoint, the slave thread is restarted and the master thread calculates c
//checkpoint_interval=100000: 39388
//checkpoint_interval=10000:  39249 cycles per fast iteration
//checkpoint_interval=1000:   38939
//checkpoint_interval=100:    39988
//no effect on performance (with track cycles enabled)

// ==== test ====
#if VDF_MODE==1
    #define VDF_TEST
    const bool is_vdf_test=true;

    const bool enable_random_error_injection=false;
    const double random_error_injection_rate=0; //0 to 1

    //#define GENERATE_ASM_TRACKING_DATA
    //#define ENABLE_TRACK_CYCLES
    const bool vdf_test_correctness=false;
    const bool enable_threads=true;
#endif

// ==== production ====
#if VDF_MODE==0
    const bool is_vdf_test=false;

    const bool enable_random_error_injection=false;
    const double random_error_injection_rate=0; //0 to 1

    const bool vdf_test_correctness=false;
    const bool enable_threads=true;

    //#define ENABLE_TRACK_CYCLES
#endif

//
//

//this doesn't do anything outside of test code
//this doesn't work with the divide table currently
#define TEST_ASM

const int gcd_size=20; //multiple of 4. must be at least half the discriminant size in bits divided by 64

const int gcd_max_iterations=gcd_size*2; //typically 1 iteration per limb

const int max_bits_base=1024; //half the discriminant number of bits, rounded up
const int reduce_max_iterations=10000;

const int num_asm_tracking_data=128;
bool enable_all_instructions=ENABLE_ALL_INSTRUCTIONS;

//if the asm code doesn't use fma, the c code shouldn't either to be the same as the asm code
const bool enable_fma_in_c_code=ENABLE_ALL_INSTRUCTIONS;

const int track_cycles_num_buckets=24; //each bucket is from 2^i to 2^(i+1) cycles
const int track_cycles_max_num=128;

void mark_vdf_test() {
    static bool did_warning=false;
    if (!is_vdf_test && !did_warning) {
        print( "test code enabled in production build" );
        did_warning=true;
    }
}