From 5c7c5753ad4877e138359057809add1b8b3e0d01 Mon Sep 17 00:00:00 2001 From: fchirica Date: Fri, 27 Sep 2019 17:52:51 +0300 Subject: [PATCH 01/13] WIP PoT: creating some blocks now with fake genesis --- lib/aiter | 2 +- lib/bip158/lib/pybind11 | 2 +- lib/chiavdf/fast_vdf/ClassGroup.h | 85 + lib/chiavdf/fast_vdf/README.md | 68 + lib/chiavdf/fast_vdf/Reducer.h | 214 + lib/chiavdf/fast_vdf/asm_base.h | 133 + lib/chiavdf/fast_vdf/asm_compiled.s | 5613 +++++++++++++++++ lib/chiavdf/fast_vdf/asm_gcd_128.h | 670 ++ .../asm_gcd_base_continued_fractions.h | 375 ++ .../fast_vdf/asm_gcd_base_divide_table.h | 185 + lib/chiavdf/fast_vdf/asm_gcd_unsigned.h | 796 +++ lib/chiavdf/fast_vdf/asm_main.h | 250 + lib/chiavdf/fast_vdf/asm_types.h | 664 ++ lib/chiavdf/fast_vdf/asm_vm.h | 131 + lib/chiavdf/fast_vdf/bit_manipulation.h | 52 + lib/chiavdf/fast_vdf/compile.sh | 2 + lib/chiavdf/fast_vdf/compile_asm.cpp | 38 + lib/chiavdf/fast_vdf/copy_to_repo.sh | 2 + lib/chiavdf/fast_vdf/double_utility.h | 115 + lib/chiavdf/fast_vdf/gcd_128.h | 245 + .../fast_vdf/gcd_base_continued_fractions.h | 757 +++ lib/chiavdf/fast_vdf/gcd_base_divide_table.h | 232 + lib/chiavdf/fast_vdf/gcd_unsigned.h | 345 + lib/chiavdf/fast_vdf/generic.h | 252 + lib/chiavdf/fast_vdf/generic_macros.h | 34 + lib/chiavdf/fast_vdf/gpu_integer.h | 639 ++ lib/chiavdf/fast_vdf/gpu_integer_divide.h | 378 ++ lib/chiavdf/fast_vdf/gpu_integer_gcd.h | 118 + lib/chiavdf/fast_vdf/include.h | 58 + lib/chiavdf/fast_vdf/install.sh | 2 + lib/chiavdf/fast_vdf/install_child.sh | 23 + lib/chiavdf/fast_vdf/integer.h | 476 ++ lib/chiavdf/fast_vdf/nucomp.h | 191 + lib/chiavdf/fast_vdf/parameters.h | 207 + lib/chiavdf/fast_vdf/picosha2.h | 377 ++ lib/chiavdf/fast_vdf/run.sh | 2 + lib/chiavdf/fast_vdf/sconstruct | 24 + lib/chiavdf/fast_vdf/threading.h | 898 +++ lib/chiavdf/fast_vdf/upload.sh | 2 + lib/chiavdf/fast_vdf/vdf.cpp | 721 +++ lib/chiavdf/fast_vdf/vdf_fast.h | 1144 ++++ lib/chiavdf/fast_vdf/vdf_new.cpp | 30 + lib/chiavdf/fast_vdf/vdf_new.h | 435 ++ lib/chiavdf/fast_vdf/vdf_original.h | 321 + lib/chiavdf/fast_vdf/vdf_test.cpp | 438 ++ lib/chiavdf/fast_vdf/vdf_test.h | 316 + lib/chiavdf/inkfish/proof_of_time.py | 24 +- src/blockchain.py | 2 +- src/config/timelord.yaml | 2 +- src/timelord.py | 111 +- 50 files changed, 18130 insertions(+), 71 deletions(-) create mode 100644 lib/chiavdf/fast_vdf/ClassGroup.h create mode 100644 lib/chiavdf/fast_vdf/README.md create mode 100644 lib/chiavdf/fast_vdf/Reducer.h create mode 100644 lib/chiavdf/fast_vdf/asm_base.h create mode 100644 lib/chiavdf/fast_vdf/asm_compiled.s create mode 100644 lib/chiavdf/fast_vdf/asm_gcd_128.h create mode 100644 lib/chiavdf/fast_vdf/asm_gcd_base_continued_fractions.h create mode 100644 lib/chiavdf/fast_vdf/asm_gcd_base_divide_table.h create mode 100644 lib/chiavdf/fast_vdf/asm_gcd_unsigned.h create mode 100644 lib/chiavdf/fast_vdf/asm_main.h create mode 100644 lib/chiavdf/fast_vdf/asm_types.h create mode 100644 lib/chiavdf/fast_vdf/asm_vm.h create mode 100644 lib/chiavdf/fast_vdf/bit_manipulation.h create mode 100644 lib/chiavdf/fast_vdf/compile.sh create mode 100644 lib/chiavdf/fast_vdf/compile_asm.cpp create mode 100755 lib/chiavdf/fast_vdf/copy_to_repo.sh create mode 100644 lib/chiavdf/fast_vdf/double_utility.h create mode 100644 lib/chiavdf/fast_vdf/gcd_128.h create mode 100644 lib/chiavdf/fast_vdf/gcd_base_continued_fractions.h create mode 100644 lib/chiavdf/fast_vdf/gcd_base_divide_table.h create mode 100644 lib/chiavdf/fast_vdf/gcd_unsigned.h create mode 100644 lib/chiavdf/fast_vdf/generic.h create mode 100644 lib/chiavdf/fast_vdf/generic_macros.h create mode 100644 lib/chiavdf/fast_vdf/gpu_integer.h create mode 100644 lib/chiavdf/fast_vdf/gpu_integer_divide.h create mode 100644 lib/chiavdf/fast_vdf/gpu_integer_gcd.h create mode 100644 lib/chiavdf/fast_vdf/include.h create mode 100755 lib/chiavdf/fast_vdf/install.sh create mode 100755 lib/chiavdf/fast_vdf/install_child.sh create mode 100644 lib/chiavdf/fast_vdf/integer.h create mode 100644 lib/chiavdf/fast_vdf/nucomp.h create mode 100644 lib/chiavdf/fast_vdf/parameters.h create mode 100644 lib/chiavdf/fast_vdf/picosha2.h create mode 100755 lib/chiavdf/fast_vdf/run.sh create mode 100755 lib/chiavdf/fast_vdf/sconstruct create mode 100644 lib/chiavdf/fast_vdf/threading.h create mode 100755 lib/chiavdf/fast_vdf/upload.sh create mode 100644 lib/chiavdf/fast_vdf/vdf.cpp create mode 100644 lib/chiavdf/fast_vdf/vdf_fast.h create mode 100644 lib/chiavdf/fast_vdf/vdf_new.cpp create mode 100644 lib/chiavdf/fast_vdf/vdf_new.h create mode 100644 lib/chiavdf/fast_vdf/vdf_original.h create mode 100644 lib/chiavdf/fast_vdf/vdf_test.cpp create mode 100644 lib/chiavdf/fast_vdf/vdf_test.h diff --git a/lib/aiter b/lib/aiter index b69ce716..28942e66 160000 --- a/lib/aiter +++ b/lib/aiter @@ -1 +1 @@ -Subproject commit b69ce7166f28e73a193b6f694ecf441c99240145 +Subproject commit 28942e66fda6397443aa33c0cb6515d8e8b991e7 diff --git a/lib/bip158/lib/pybind11 b/lib/bip158/lib/pybind11 index c9d32a81..34c2281e 160000 --- a/lib/bip158/lib/pybind11 +++ b/lib/bip158/lib/pybind11 @@ -1 +1 @@ -Subproject commit c9d32a81f40ad540015814edf13b29980c63e39c +Subproject commit 34c2281e315c51f5270321101dc733c1cf26214f diff --git a/lib/chiavdf/fast_vdf/ClassGroup.h b/lib/chiavdf/fast_vdf/ClassGroup.h new file mode 100644 index 00000000..42b75fac --- /dev/null +++ b/lib/chiavdf/fast_vdf/ClassGroup.h @@ -0,0 +1,85 @@ +/** +Copyright (C) 2018 Markku Pulkkinen + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +**/ + +#ifndef CLASSGROUP_H +#define CLASSGROUP_H + +#include +#include "gmp.h" + +/** + * @brief The ClassGroup data struct for VDF variables a, b, c and discriminant. + * Optimal size because it fits into single entry of 64 byte wide cache line. + */ +struct alignas(64) ClassGroup { + mpz_t a; + mpz_t b; + mpz_t c; + mpz_t d; +}; + +/** + * @brief ClassGroupContext struct - placeholder for variables + * in classgroup arithmetic operations. Uses four cache + * line entries, 256 bytes. + */ +struct alignas(64) ClassGroupContext { + mpz_t a; + mpz_t b; + mpz_t c; + mpz_t mu; + + mpz_t m; + mpz_t r; + mpz_t s; + mpz_t faa; + + mpz_t fab; + mpz_t fac; + mpz_t fba; + mpz_t fbb; + + mpz_t fbc; + mpz_t fca; + mpz_t fcb; + mpz_t fcc; + + ClassGroupContext(uint32_t numBits = 4096) { + mpz_init2(a, numBits); + mpz_init2(b, numBits); + mpz_init2(c, numBits); + mpz_init2(mu, numBits); + mpz_init2(m, numBits); + mpz_init2(r, numBits); + mpz_init2(s, numBits); + mpz_init2(faa, numBits); + mpz_init2(fab, numBits); + mpz_init2(fac, numBits); + mpz_init2(fba, numBits); + mpz_init2(fbb, numBits); + mpz_init2(fbc, numBits); + mpz_init2(fca, numBits); + mpz_init2(fcb, numBits); + mpz_init2(fcc, numBits); + } + + ~ClassGroupContext() { + mpz_clears(a, b, c, mu, m, r, s, faa, fab, fac, fba, fbb, fbc, fca, fcb, + fcc, NULL); + } +}; + +#endif // CLASSGROUP_H diff --git a/lib/chiavdf/fast_vdf/README.md b/lib/chiavdf/fast_vdf/README.md new file mode 100644 index 00000000..66af1c47 --- /dev/null +++ b/lib/chiavdf/fast_vdf/README.md @@ -0,0 +1,68 @@ +Copyright 2018 Ilya Gorodetskov +generic@sundersoft.com + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=== Summary === + +The NUDUPL algorithm is used. The equations are based on cryptoslava's equations from the previous contest. They were modified slightly to increase the level of parallelism. + +The GCD is a custom implementation with scalar integers. There are two base cases: one uses a lookup table with continued fractions and the other uses the euclidean algorithm with a division table. The division table algorithm is slightly faster even though it has about 2x as many iterations. + +After the base case, there is a 128 bit GCD that generates 64 bit cofactor matricies with Lehmer's algorithm. This is required to make the long integer multiplications efficient (Flint's implementation doesn't do this). + +The GCD also implements Flint's partial xgcd function, but the output is slightly different. This implementation will always return an A value which is > the threshold and a B value which is <= the threshold. For a normal GCD, the threshold is 0, B is 0, and A is the GCD. Also the interfaces are slightly different. + +Scalar integers are used for the GCD. I don't expect any speedup for the SIMD integers that were used in the last implementation since the GCD only uses 64x1024 multiplications, which are too small and have too high of a carry overhead for the SIMD version to be faster. In either case, most of the time seems to be spent in the base case so it shouldn't matter too much. + +If SIMD integers are used with AVX-512, doubles have to be used because the multiplier sizes for doubles are significantly larger than for integers. There is an AVX-512 extension to support larger integer multiplications but no processor implements it yet. It should be possible to do a 50 bit multiply-add into a 100 bit accumulator with 4 fused multiply-adds if the accumulators have a special nonzero initial value and the inputs are scaled before the multiplication. This would make AVX-512 about 2.5x faster than scalar code for 1024x1024 integer multiplications (assuming the scalar code is unrolled and uses ADOX/ADCX/MULX properly, and the CPU can execute this at 1 cycle per iteration which it probably can't). + +The GCD is parallelized by calculating the cofactors in a separate slave thread. The master thread will calculate the cofactor matricies and send them to the slave thread. Other calculations are also parallelized. + +The VDF implementation from the first contest is still used as a fallback and is called about once every 5000 iterations. The GCD will encounter large quotients about this often and these are not implemented. This has a negligble effect on performance. Also, the NUDUPL case where A<=L is not implemented; it will fall back to the old implementation in this case (this never happens outside of the first 20 or so iterations). + +There is also corruption detection by calculating C with a non-exact division and making sure the remainder is 0. This detected all injected random corruptions that I tested. No corruptions caused by bugs were observed during testing. This cannot correct for the sign of B being wrong. + +=== GCD continued fraction lookup table === + +The is implemented in gcd_base_continued_fractions.h and asm_gcd_base_continued_fractions.h. The division table implementation is the same as the previous entry and was discussed there. Currently the division table is only used if AVX2 is enabled but it could be ported to SSE or scalar code easily. Both implementations have about the same performance. + +The initial quotient sequence of gcd(a,b) is the same as the initial quotient sequence of gcd(a*2^n/b, 2^n) for any n. This is because the GCD quotients are the same as the continued fraction quotients of a/b, and the initial continued fraction quotients only depend on the initial bits of a/b. This makes it feasible to have a lookup table since it now only has one input. + +a*2^n/b is calculated by doing a double precision division of a/b, and then truncating the lower bits. Some of the exponent bits are used in the table in addition to the fraction bits; this makes each slot of the table vary in size depending on what the exponent is. If the result is outside the table bounds, then the division result is floored to fall back to the euclidean algorithm (this is very rare). + +The table is calculated by iterating all of the possible continued fractions that have a certain initial quotient sequence. Iteration ends when all of these fractions are either outside the table or they don't fully contain at least one slot of the table. Each slot that is fully contained by such a fraction is updated so that its quotient sequence equals the fraction's initial quotient sequence. Once this is complete, the cofactor matricies are calculated from the quotient sequences. Each cofactor matrix is 4 doubles. + +The resulting code seems to have too many instructions so it doesn't perform very well. There might be some way to optimize it. It was written for SSE so that it would run on both processors. + +This might work better on an FPGA possibly with low latency DRAM or SRAM (compared to the euclidean algorithm with a division table). There is no limit to the size of the table but doubling the latency would require the number of bits in the table to also be doubled to have the same performance. + +=== Other GCD code === + +The gcd_128 function calculates a 128 bit GCD using Lehmer's algorithm. It is pretty straightforward and uses only unsigned arithmetic. Each cofactor matrix can only have two possible signs: [+ -; - +] or [- +; + -]. The gcd_unsigned function uses unsigned arithmetic and a jump table to apply the 64-bit cofactor matricies to the A and B values. It uses ADOX/ADCX/MULX if they are available and falls back to ADC/MUL otherwise. It will track the last known size of A to speed up the bit shifts required to get the top 128 bits of A. + +No attempt was made to try to do the A and B long integer multiplications on a separate thread; I wouldn't expect any performance improvement from this. + +=== Threads === + +There is a master thread and a slave thread. The slave thread only exists for each batch of 5000 or so squarings and is then destroyed and recreated for the next batch (this has no measurable overhead). If the original VDF is used as a fallback, the batch ends and the slave thread is destroyed. + +Each thread has a 64-bit counter that only it can write to. Also, during a squaring iteration, it will not overwrite any value that it has previously written and transmitted to the other thread. Each squaring is split up into phases. Each thread will update its counter at the start of the phase (the counter can only be increased, not decreased). It can then wait on the other thread's counter to reach a certain value as part of a spin loop. If the spin loop takes too long, an error condition is raised and the batch ends; this should prevent any deadlocks from happening. + +No CPU fences or atomics are required since each value can only be written to by one thread and since x86 enforces acquire/release ordering on all memory operations. Compiler memory fences are still required to prevent the compiler from caching or reordering memory operations. + +The GCD master thread will increment the counter when a new cofactor matrix has been outputted. The slave thread will spin on this counter and then apply the cofactor matrix to the U or V vector to get a new U or V vector. + +It was attempted to use modular arithmetic to calculate k directly but this slowed down the program due to GMP's modulo or integer multiply operations not having enough performance. This also makes the integer multiplications bigger. + +The speedup isn't very high since most of the time is spent in the GCD base case and these can't be parallelized. \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/Reducer.h b/lib/chiavdf/fast_vdf/Reducer.h new file mode 100644 index 00000000..e93ba343 --- /dev/null +++ b/lib/chiavdf/fast_vdf/Reducer.h @@ -0,0 +1,214 @@ +/** +Copyright (C) 2019 Markku Pulkkinen + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +**/ + +#ifndef REDUCER_H +#define REDUCER_H + +#include +#include +#include + +#include "ClassGroup.h" + +/** constants utilized in reduction algorithm */ +namespace { +const int_fast64_t THRESH{1ul << 31}; +const int_fast64_t EXP_THRESH{31}; +} + +/** + * @brief The Reducer class that does custom reduce operation for VDF + * repeated squaring algorithm. The implementation is based on + * Akashnil VDF competition entry and further optimized for speed. + */ +class alignas(64) Reducer { +public: + /** + * @brief Reducer - constructs by using reference into cg context. + */ + Reducer(ClassGroupContext &ctx_) : ctx(ctx_) {} + + ~Reducer() {} + + /** + * @brief run - runs reduction algorithm for cg context params + */ + inline void run() { + while (!isReduced()) { + int_fast64_t a, b, c; + { + int_fast64_t a_exp, b_exp, c_exp; + mpz_get_si_2exp(a, a_exp, ctx.a); + mpz_get_si_2exp(b, b_exp, ctx.b); + mpz_get_si_2exp(c, c_exp, ctx.c); + auto mm = std::minmax({a_exp, b_exp, c_exp}); + if (mm.second - mm.first > EXP_THRESH) { + reducer(); + continue; + } + // Ensure a, b, c are shifted so that a : b : c ratios are same as + // f.a : f.b : f.c. a, b, c will be used as approximations to f.a, + // f.b, f.c + int_fast64_t max_exp(mm.second++); // for safety vs overflow + a >>= (max_exp - a_exp); + b >>= (max_exp - b_exp); + c >>= (max_exp - c_exp); + } + { + int_fast64_t u, v, w, x; + calc_uvwx(u, v, w, x, a, b, c); + + mpz_mul_si(ctx.faa, ctx.a, u * u); + mpz_mul_si(ctx.fab, ctx.b, u * w); + mpz_mul_si(ctx.fac, ctx.c, w * w); + + mpz_mul_si(ctx.fba, ctx.a, u * v << 1); + mpz_mul_si(ctx.fbb, ctx.b, u * x + v * w); + mpz_mul_si(ctx.fbc, ctx.c, w * x << 1); + + mpz_mul_si(ctx.fca, ctx.a, v * v); + mpz_mul_si(ctx.fcb, ctx.b, v * x); + mpz_mul_si(ctx.fcc, ctx.c, x * x); + + mpz_add(ctx.a, ctx.faa, ctx.fab); + mpz_add(ctx.a, ctx.a, ctx.fac); + + mpz_add(ctx.b, ctx.fba, ctx.fbb); + mpz_add(ctx.b, ctx.b, ctx.fbc); + + mpz_add(ctx.c, ctx.fca, ctx.fcb); + mpz_add(ctx.c, ctx.c, ctx.fcc); + } + } + } + +private: + + inline void signed_shift(uint64_t op, int64_t shift, int_fast64_t &r) { + if (shift > 0) + r = static_cast(op << shift); + else if (shift <= -64) + r = 0; + else + r = static_cast(op >> (-shift)); + } + + inline void mpz_get_si_2exp(int_fast64_t &r, int_fast64_t &exp, + const mpz_t op) { + // Return an approximation x of the large mpz_t op by an int64_t and the + // exponent e adjustment. We must have (x * 2^e) / op = constant + // approximately. + int_fast64_t size(static_cast(mpz_size(op))); + uint_fast64_t last(mpz_getlimbn(op, (size - 1))); + int_fast64_t lg2 = exp = ((63 - __builtin_clzll(last)) + 1); + signed_shift(last, (63 - exp), r); + if (size > 1) { + exp += (size - 1) * 64; + uint_fast64_t prev(mpz_getlimbn(op, (size - 2))); + int_fast64_t t; + signed_shift(prev, -1 - lg2, t); + r += t; + } + if (mpz_sgn(op) < 0) + r = -r; + } + + inline bool isReduced() { + int a_b(mpz_cmpabs(ctx.a, ctx.b)); + int c_b(mpz_cmpabs(ctx.c, ctx.b)); + if (a_b < 0 || c_b < 0) + return false; + + int a_c(mpz_cmp(ctx.a, ctx.c)); + if (a_c > 0) { + mpz_swap(ctx.a, ctx.c); + mpz_neg(ctx.b, ctx.b); + } else if (a_c == 0 && mpz_sgn(ctx.b) < 0) { + mpz_neg(ctx.b, ctx.b); + } + return true; + } + + inline void reducer() { + // (c + b)/2c == (1 + (b/c))/2 -> s + mpz_mdiv(ctx.r, ctx.b, ctx.c); + mpz_add_ui(ctx.r, ctx.r, 1); + mpz_div_2exp(ctx.s, ctx.r, 1); + // cs -> m + mpz_mul(ctx.m, ctx.c, ctx.s); + // 2cs -> r + mpz_mul_2exp(ctx.r, ctx.m, 1); + // (cs - b) -> m + mpz_sub(ctx.m, ctx.m, ctx.b); + + // new b = -b + 2cs + mpz_sub(ctx.b, ctx.r, ctx.b); + // new a = c, c = a + mpz_swap(ctx.a, ctx.c); + // new c = c + cs^2 - bs ( == c + (s * ( cs - b))) + mpz_addmul(ctx.c, ctx.s, ctx.m); + } + + inline void calc_uvwx(int_fast64_t &u, int_fast64_t &v, int_fast64_t &w, + int_fast64_t &x, int_fast64_t &a, int_fast64_t &b, + int_fast64_t &c) { + // We must be very careful about overflow in the following steps + int below_threshold; + int_fast64_t u_{1}, v_{0}, w_{0}, x_{1}; + int_fast64_t a_, b_, s; + do { + u = u_; + v = v_; + w = w_; + x = x_; + + s = static_cast( + (floorf(b / (static_cast(c))) + 1)) >> + 1; + + a_ = a; + b_ = b; + // cs = c * s; + + // a = c + a = c; + // b = -b + 2cs + b = -b + (c * s << 1); + // c = a + cs^2 - bs + c = a_ - s * (b_ - c * s); + + u_ = v; + v_ = -u + s * v; + w_ = x; + x_ = -w + s * x; + + // The condition (abs(v_) | abs(x_)) <= THRESH protects against + // overflow + below_threshold = (abs(v_) | abs(x_)) <= THRESH ? 1 : 0; + } while (below_threshold && a > c && c > 0); + + if (below_threshold) { + u = u_; + v = v_; + w = w_; + x = x_; + } + } + + ClassGroupContext &ctx; +}; + +#endif // REDUCER_H diff --git a/lib/chiavdf/fast_vdf/asm_base.h b/lib/chiavdf/fast_vdf/asm_base.h new file mode 100644 index 00000000..f414305d --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_base.h @@ -0,0 +1,133 @@ +#ifdef GENERATE_ASM_TRACKING_DATA + const bool generate_asm_tracking_data=true; +#else + const bool generate_asm_tracking_data=false; +#endif + +namespace asm_code { + + +string track_asm(string comment, string jump_to = "") { + if (!generate_asm_tracking_data) { + return jump_to; + } + + mark_vdf_test(); + + static map id_map; + static int next_id=1; + + int& id=id_map[comment]; + if (id==0) { + id=next_id; + ++next_id; + } + + assert(id>=1 && id<=num_asm_tracking_data); + + // + // + + static bool init=false; + if (!init) { + APPEND_M(str( ".data" )); + APPEND_M(str( ".balign 8" )); + + APPEND_M(str( "track_asm_rax: .quad 0" )); + + //APPEND_M(str( ".global asm_tracking_data" )); + //APPEND_M(str( "asm_tracking_data:" )); + //for (int x=0;x, string> constant_map; + string& name=constant_map[make_pair(value_bits_0, value_bits_1)]; + + if (name.empty()) { + name=m.alloc_label(); + +#ifdef CHIAOSX + APPEND_M(str( ".text " )); +#else + APPEND_M(str( ".text 1" )); +#endif + APPEND_M(str( ".balign 16" )); + APPEND_M(str( "#:", name )); + APPEND_M(str( ".quad #", to_hex(value_bits_0) )); //lane 0 + APPEND_M(str( ".quad #", to_hex(value_bits_1) )); //lane 1 + APPEND_M(str( ".text" )); + } +#ifdef CHIAOSX + return (use_brackets)? str( "[RIP+#]", name ) : name; +#else + return (use_brackets)? str( "[#]", name ) : name; +#endif +} + +string constant_address_double(double value_0, double value_1, bool use_brackets=true) { + uint64 value_bits_0=*(uint64*)&value_0; + uint64 value_bits_1=*(uint64*)&value_1; + return constant_address_uint64(value_bits_0, value_bits_1, use_brackets); +} + + +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/asm_compiled.s b/lib/chiavdf/fast_vdf/asm_compiled.s new file mode 100644 index 00000000..5f7ec1f5 --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_compiled.s @@ -0,0 +1,5613 @@ +Xx_1: .intel_syntax noprefix # asm_function:57 .intel_syntax noprefix +Xx_2: .global _asm_func_gcd_base # asm_function:64 .global _asm_func_gcd_base +Xx_3: _asm_func_gcd_base: # asm_function:65 _asm_func_gcd_base: +Xx_4: PUSH RBP # asm_function:84 PUSH RBP +Xx_5: PUSH RBX # asm_function:84 PUSH RBX +Xx_6: PUSH R12 # asm_function:84 PUSH R12 +Xx_7: PUSH R13 # asm_function:84 PUSH R13 +Xx_8: PUSH R14 # asm_function:84 PUSH R14 +Xx_9: PUSH R15 # asm_function:84 PUSH R15 +Xx_10: MOV RAX, RSP # asm_function:96 MOV RAX, RSP +Xx_11: AND RSP, -64 # asm_function:97 AND RSP, -64 +Xx_12: SUB RSP, 64 # asm_function:98 SUB RSP, 64 +Xx_13: MOV [RSP], RAX # asm_function:99 MOV [RSP], RAX +Xx_14: MOVDQU XMM0, [RDI] # compile_asm_gcd_base:38 MOVDQU `ab, [`ab_addr] +Xx_15: MOVDQU XMM1, [RSI] # compile_asm_gcd_base:39 MOVDQU `u, [`u_addr] +Xx_16: MOVDQU XMM2, [RDX] # compile_asm_gcd_base:40 MOVDQU `v, [`v_addr] +Xx_17: MOVDQU XMM3, [RCX] # compile_asm_gcd_base:41 MOVDQU `is_lehmer, [`is_lehmer_addr] +Xx_18: MOVDQU XMM4, [R8] # compile_asm_gcd_base:42 MOVDQU `ab_threshold, [`ab_threshold_addr] +Xx_19: .text # gcd_base_continued_fraction:82 .text +Xx_20: .balign 64 # gcd_base_continued_fraction:86 .balign 64 +Xx_21: gcd_base_table: # gcd_base_continued_fraction:87 gcd_base_table: +Xx_22: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_23: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_24: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_25: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_26: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_27: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_28: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_29: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_30: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_31: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_32: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_33: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_34: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_35: .quad 0x3ff0000000000000, 0xc022000000000000, 0xbff0000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc022000000000000, 0xbff0000000000000, 0x4024000000000000 +Xx_36: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_37: .quad 0x3ff0000000000000, 0xc020000000000000, 0xbff0000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc020000000000000, 0xbff0000000000000, 0x4022000000000000 +Xx_38: .quad 0xc01c000000000000, 0x4020000000000000, 0x4020000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xc01c000000000000, 0x4020000000000000, 0x4020000000000000, 0xc022000000000000 +Xx_39: .quad 0x3ff0000000000000, 0xc01c000000000000, 0xbff0000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc01c000000000000, 0xbff0000000000000, 0x4020000000000000 +Xx_40: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_41: .quad 0x3ff0000000000000, 0xc018000000000000, 0xbff0000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc018000000000000, 0xbff0000000000000, 0x401c000000000000 +Xx_42: .quad 0x3ff0000000000000, 0xc018000000000000, 0xbff0000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc018000000000000, 0xbff0000000000000, 0x401c000000000000 +Xx_43: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_44: .quad 0xc014000000000000, 0x4018000000000000, 0x4018000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xc014000000000000, 0x4018000000000000, 0x4018000000000000, 0xc01c000000000000 +Xx_45: .quad 0x3ff0000000000000, 0xc014000000000000, 0xbff0000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc014000000000000, 0xbff0000000000000, 0x4018000000000000 +Xx_46: .quad 0x3ff0000000000000, 0xc014000000000000, 0xbff0000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc014000000000000, 0xbff0000000000000, 0x4018000000000000 +Xx_47: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_48: .quad 0xc010000000000000, 0x4014000000000000, 0x4014000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0xc010000000000000, 0x4014000000000000, 0x4014000000000000, 0xc018000000000000 +Xx_49: .quad 0xc010000000000000, 0x4014000000000000, 0x4014000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0xc010000000000000, 0x4014000000000000, 0x4014000000000000, 0xc018000000000000 +Xx_50: .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 +Xx_51: .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 +Xx_52: .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 +Xx_53: .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xbff0000000000000, 0x4014000000000000 +Xx_54: .quad 0xc008000000000000, 0x4010000000000000, 0x4010000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4010000000000000, 0xc014000000000000 +Xx_55: .quad 0xc008000000000000, 0x4010000000000000, 0x4010000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4010000000000000, 0xc014000000000000 +Xx_56: .quad 0xc008000000000000, 0x4010000000000000, 0x4010000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4010000000000000, 0xc014000000000000 +Xx_57: .quad 0x4010000000000000, 0xc01c000000000000, 0xc014000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x4010000000000000, 0xc01c000000000000, 0xc014000000000000, 0x4022000000000000 +Xx_58: .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 +Xx_59: .quad 0xc008000000000000, 0x401c000000000000, 0x4010000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x401c000000000000, 0x4010000000000000, 0xc022000000000000 +Xx_60: .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 +Xx_61: .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 +Xx_62: .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 +Xx_63: .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xbff0000000000000, 0x4010000000000000 +Xx_64: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xbff0000000000000 +Xx_65: .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 +Xx_66: .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 +Xx_67: .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 +Xx_68: .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4008000000000000, 0xc010000000000000 +Xx_69: .quad 0x4008000000000000, 0xc020000000000000, 0xc010000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc020000000000000, 0xc010000000000000, 0x4026000000000000 +Xx_70: .quad 0xc014000000000000, 0x4020000000000000, 0x401c000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xc014000000000000, 0x4020000000000000, 0x401c000000000000, 0xc026000000000000 +Xx_71: .quad 0x4008000000000000, 0xc014000000000000, 0xc010000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc014000000000000, 0xc010000000000000, 0x401c000000000000 +Xx_72: .quad 0x4008000000000000, 0xc014000000000000, 0xc010000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc014000000000000, 0xc010000000000000, 0x401c000000000000 +Xx_73: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_74: .quad 0xc000000000000000, 0x4014000000000000, 0x4008000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4014000000000000, 0x4008000000000000, 0xc01c000000000000 +Xx_75: .quad 0xc000000000000000, 0x4014000000000000, 0x4008000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4014000000000000, 0x4008000000000000, 0xc01c000000000000 +Xx_76: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_77: .quad 0xc000000000000000, 0x401c000000000000, 0x4008000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x401c000000000000, 0x4008000000000000, 0xc024000000000000 +Xx_78: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_79: .quad 0xc000000000000000, 0x4022000000000000, 0x4008000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4022000000000000, 0x4008000000000000, 0xc02a000000000000 +Xx_80: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_81: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_82: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_83: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_84: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_85: .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xbff0000000000000, 0x4008000000000000 +Xx_86: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_87: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_88: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_89: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_90: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_91: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_92: .quad 0x4000000000000000, 0xc022000000000000, 0xc008000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc022000000000000, 0xc008000000000000, 0x402c000000000000 +Xx_93: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_94: .quad 0x4000000000000000, 0xc01c000000000000, 0xc008000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc01c000000000000, 0xc008000000000000, 0x4026000000000000 +Xx_95: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_96: .quad 0x4000000000000000, 0xc014000000000000, 0xc008000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc014000000000000, 0xc008000000000000, 0x4020000000000000 +Xx_97: .quad 0x4000000000000000, 0xc014000000000000, 0xc008000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc014000000000000, 0xc008000000000000, 0x4020000000000000 +Xx_98: .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4000000000000000, 0xc008000000000000 +Xx_99: .quad 0xc008000000000000, 0x4014000000000000, 0x4014000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4014000000000000, 0x4014000000000000, 0xc020000000000000 +Xx_100: .quad 0xc008000000000000, 0x4014000000000000, 0x4014000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4014000000000000, 0x4014000000000000, 0xc020000000000000 +Xx_101: .quad 0x4014000000000000, 0xc020000000000000, 0xc020000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x4014000000000000, 0xc020000000000000, 0xc020000000000000, 0x402a000000000000 +Xx_102: .quad 0xc008000000000000, 0x4020000000000000, 0x4014000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4020000000000000, 0x4014000000000000, 0xc02a000000000000 +Xx_103: .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 +Xx_104: .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 +Xx_105: .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 +Xx_106: .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc008000000000000, 0x4014000000000000 +Xx_107: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_108: .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 +Xx_109: .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 +Xx_110: .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 +Xx_111: .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 +Xx_112: .quad 0x4008000000000000, 0xc01c000000000000, 0xc014000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc01c000000000000, 0xc014000000000000, 0x4028000000000000 +Xx_113: .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4000000000000000, 0xc014000000000000 +Xx_114: .quad 0xc010000000000000, 0x401c000000000000, 0x401c000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0xc010000000000000, 0x401c000000000000, 0x401c000000000000, 0xc028000000000000 +Xx_115: .quad 0x4008000000000000, 0xc010000000000000, 0xc014000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc014000000000000, 0x401c000000000000 +Xx_116: .quad 0x4008000000000000, 0xc010000000000000, 0xc014000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc014000000000000, 0x401c000000000000 +Xx_117: .quad 0x4008000000000000, 0xc010000000000000, 0xc014000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc014000000000000, 0x401c000000000000 +Xx_118: .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 +Xx_119: .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 +Xx_120: .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 +Xx_121: .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4000000000000000, 0xc01c000000000000 +Xx_122: .quad 0x4010000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x4010000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4022000000000000 +Xx_123: .quad 0x4010000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x4010000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4022000000000000 +Xx_124: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_125: .quad 0xbff0000000000000, 0x4014000000000000, 0x4000000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4014000000000000, 0x4000000000000000, 0xc022000000000000 +Xx_126: .quad 0xbff0000000000000, 0x4014000000000000, 0x4000000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4014000000000000, 0x4000000000000000, 0xc022000000000000 +Xx_127: .quad 0x4014000000000000, 0xc018000000000000, 0xc022000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4014000000000000, 0xc018000000000000, 0xc022000000000000, 0x4026000000000000 +Xx_128: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_129: .quad 0xbff0000000000000, 0x4018000000000000, 0x4000000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4018000000000000, 0x4000000000000000, 0xc026000000000000 +Xx_130: .quad 0xbff0000000000000, 0x4018000000000000, 0x4000000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4018000000000000, 0x4000000000000000, 0xc026000000000000 +Xx_131: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_132: .quad 0xbff0000000000000, 0x401c000000000000, 0x4000000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x401c000000000000, 0x4000000000000000, 0xc02a000000000000 +Xx_133: .quad 0x401c000000000000, 0xc020000000000000, 0xc02a000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x401c000000000000, 0xc020000000000000, 0xc02a000000000000, 0x402e000000000000 +Xx_134: .quad 0xbff0000000000000, 0x4020000000000000, 0x4000000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4020000000000000, 0x4000000000000000, 0xc02e000000000000 +Xx_135: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_136: .quad 0xbff0000000000000, 0x4022000000000000, 0x4000000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4022000000000000, 0x4000000000000000, 0xc031000000000000 +Xx_137: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_138: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_139: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_140: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_141: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_142: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_143: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_144: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_145: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_146: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_147: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_148: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_149: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0x4000000000000000 +Xx_150: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_151: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_152: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_153: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_154: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_155: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_156: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_157: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_158: .quad 0x3ff0000000000000, 0xc01c000000000000, 0xc000000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc01c000000000000, 0xc000000000000000, 0x402e000000000000 +Xx_159: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_160: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_161: .quad 0x3ff0000000000000, 0xc014000000000000, 0xc000000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc014000000000000, 0xc000000000000000, 0x4026000000000000 +Xx_162: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_163: .quad 0xc010000000000000, 0x4014000000000000, 0x4022000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xc010000000000000, 0x4014000000000000, 0x4022000000000000, 0xc026000000000000 +Xx_164: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc000000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc000000000000000, 0x4022000000000000 +Xx_165: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc000000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc000000000000000, 0x4022000000000000 +Xx_166: .quad 0xc008000000000000, 0x4010000000000000, 0x401c000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x401c000000000000, 0xc022000000000000 +Xx_167: .quad 0xc008000000000000, 0x4010000000000000, 0x401c000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x401c000000000000, 0xc022000000000000 +Xx_168: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc000000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc000000000000000, 0x401c000000000000 +Xx_169: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc000000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc000000000000000, 0x401c000000000000 +Xx_170: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc000000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc000000000000000, 0x401c000000000000 +Xx_171: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc000000000000000 +Xx_172: .quad 0xc000000000000000, 0x4008000000000000, 0x4014000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4014000000000000, 0xc01c000000000000 +Xx_173: .quad 0xc000000000000000, 0x4008000000000000, 0x4014000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4014000000000000, 0xc01c000000000000 +Xx_174: .quad 0x4008000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4028000000000000 +Xx_175: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 +Xx_176: .quad 0xc000000000000000, 0x4014000000000000, 0x4014000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4014000000000000, 0x4014000000000000, 0xc028000000000000 +Xx_177: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 +Xx_178: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 +Xx_179: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 +Xx_180: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 +Xx_181: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc000000000000000, 0x4014000000000000 +Xx_182: .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 +Xx_183: .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 +Xx_184: .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 +Xx_185: .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 +Xx_186: .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 +Xx_187: .quad 0x4000000000000000, 0xc014000000000000, 0xc014000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc014000000000000, 0xc014000000000000, 0x402a000000000000 +Xx_188: .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4008000000000000, 0xc014000000000000 +Xx_189: .quad 0xc008000000000000, 0x4014000000000000, 0x4020000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4014000000000000, 0x4020000000000000, 0xc02a000000000000 +Xx_190: .quad 0x4000000000000000, 0xc008000000000000, 0xc014000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc014000000000000, 0x4020000000000000 +Xx_191: .quad 0x4000000000000000, 0xc008000000000000, 0xc014000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc014000000000000, 0x4020000000000000 +Xx_192: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_193: .quad 0xbff0000000000000, 0x4008000000000000, 0x4008000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4008000000000000, 0xc020000000000000 +Xx_194: .quad 0xbff0000000000000, 0x4008000000000000, 0x4008000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4008000000000000, 0xc020000000000000 +Xx_195: .quad 0xbff0000000000000, 0x4008000000000000, 0x4008000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4008000000000000, 0xc020000000000000 +Xx_196: .quad 0x4008000000000000, 0xc010000000000000, 0xc020000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc020000000000000, 0x4026000000000000 +Xx_197: .quad 0x4008000000000000, 0xc010000000000000, 0xc020000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc020000000000000, 0x4026000000000000 +Xx_198: .quad 0xbff0000000000000, 0x4010000000000000, 0x4008000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4008000000000000, 0xc026000000000000 +Xx_199: .quad 0xbff0000000000000, 0x4010000000000000, 0x4008000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4008000000000000, 0xc026000000000000 +Xx_200: .quad 0x4010000000000000, 0xc014000000000000, 0xc026000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x4010000000000000, 0xc014000000000000, 0xc026000000000000, 0x402c000000000000 +Xx_201: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_202: .quad 0xbff0000000000000, 0x4014000000000000, 0x4008000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4014000000000000, 0x4008000000000000, 0xc02c000000000000 +Xx_203: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_204: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_205: .quad 0xbff0000000000000, 0x401c000000000000, 0x4008000000000000, 0xc034000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x401c000000000000, 0x4008000000000000, 0xc034000000000000 +Xx_206: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_207: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_208: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_209: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_210: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_211: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_212: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_213: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x4008000000000000 +Xx_214: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_215: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_216: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_217: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_218: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_219: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_220: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_221: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_222: .quad 0x3ff0000000000000, 0xc01c000000000000, 0xc008000000000000, 0x4036000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc01c000000000000, 0xc008000000000000, 0x4036000000000000 +Xx_223: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_224: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_225: .quad 0x3ff0000000000000, 0xc014000000000000, 0xc008000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc014000000000000, 0xc008000000000000, 0x4030000000000000 +Xx_226: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_227: .quad 0xc010000000000000, 0x4014000000000000, 0x402a000000000000, 0xc030000000000000 # gcd_base_continued_fraction:106 .quad 0xc010000000000000, 0x4014000000000000, 0x402a000000000000, 0xc030000000000000 +Xx_228: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc008000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc008000000000000, 0x402a000000000000 +Xx_229: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc008000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc008000000000000, 0x402a000000000000 +Xx_230: .quad 0xc008000000000000, 0x4010000000000000, 0x4024000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4024000000000000, 0xc02a000000000000 +Xx_231: .quad 0xc008000000000000, 0x4010000000000000, 0x4024000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4024000000000000, 0xc02a000000000000 +Xx_232: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc008000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc008000000000000, 0x4024000000000000 +Xx_233: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc008000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc008000000000000, 0x4024000000000000 +Xx_234: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc008000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc008000000000000, 0x4024000000000000 +Xx_235: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc008000000000000 +Xx_236: .quad 0xc000000000000000, 0x4008000000000000, 0x401c000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x401c000000000000, 0xc024000000000000 +Xx_237: .quad 0xc000000000000000, 0x4008000000000000, 0x401c000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x401c000000000000, 0xc024000000000000 +Xx_238: .quad 0x4008000000000000, 0xc014000000000000, 0xc024000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc014000000000000, 0xc024000000000000, 0x4031000000000000 +Xx_239: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 +Xx_240: .quad 0xc000000000000000, 0x4014000000000000, 0x401c000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4014000000000000, 0x401c000000000000, 0xc031000000000000 +Xx_241: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 +Xx_242: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 +Xx_243: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 +Xx_244: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 +Xx_245: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc008000000000000, 0x401c000000000000 +Xx_246: .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 +Xx_247: .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 +Xx_248: .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 +Xx_249: .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 +Xx_250: .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 +Xx_251: .quad 0x4000000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4032000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc014000000000000, 0xc01c000000000000, 0x4032000000000000 +Xx_252: .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4010000000000000, 0xc01c000000000000 +Xx_253: .quad 0xc008000000000000, 0x4014000000000000, 0x4026000000000000, 0xc032000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4014000000000000, 0x4026000000000000, 0xc032000000000000 +Xx_254: .quad 0x4000000000000000, 0xc008000000000000, 0xc01c000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc01c000000000000, 0x4026000000000000 +Xx_255: .quad 0x4000000000000000, 0xc008000000000000, 0xc01c000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc01c000000000000, 0x4026000000000000 +Xx_256: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_257: .quad 0xbff0000000000000, 0x4008000000000000, 0x4010000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4010000000000000, 0xc026000000000000 +Xx_258: .quad 0xbff0000000000000, 0x4008000000000000, 0x4010000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4010000000000000, 0xc026000000000000 +Xx_259: .quad 0xbff0000000000000, 0x4008000000000000, 0x4010000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4010000000000000, 0xc026000000000000 +Xx_260: .quad 0x4008000000000000, 0xc010000000000000, 0xc026000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc026000000000000, 0x402e000000000000 +Xx_261: .quad 0x4008000000000000, 0xc010000000000000, 0xc026000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc026000000000000, 0x402e000000000000 +Xx_262: .quad 0xbff0000000000000, 0x4010000000000000, 0x4010000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4010000000000000, 0xc02e000000000000 +Xx_263: .quad 0xbff0000000000000, 0x4010000000000000, 0x4010000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4010000000000000, 0xc02e000000000000 +Xx_264: .quad 0x4010000000000000, 0xc014000000000000, 0xc02e000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x4010000000000000, 0xc014000000000000, 0xc02e000000000000, 0x4033000000000000 +Xx_265: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_266: .quad 0xbff0000000000000, 0x4014000000000000, 0x4010000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4014000000000000, 0x4010000000000000, 0xc033000000000000 +Xx_267: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_268: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_269: .quad 0xbff0000000000000, 0x401c000000000000, 0x4010000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x401c000000000000, 0x4010000000000000, 0xc03b000000000000 +Xx_270: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_271: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_272: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_273: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_274: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_275: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_276: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_277: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc008000000000000, 0x4010000000000000 +Xx_278: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_279: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_280: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_281: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_282: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_283: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_284: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_285: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc010000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc010000000000000, 0x4031000000000000 +Xx_286: .quad 0xc008000000000000, 0x4010000000000000, 0x402a000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x402a000000000000, 0xc031000000000000 +Xx_287: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc010000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc010000000000000, 0x402a000000000000 +Xx_288: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc010000000000000 +Xx_289: .quad 0xc000000000000000, 0x4008000000000000, 0x4022000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4022000000000000, 0xc02a000000000000 +Xx_290: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 +Xx_291: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 +Xx_292: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 +Xx_293: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc010000000000000, 0x4022000000000000 +Xx_294: .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 +Xx_295: .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 +Xx_296: .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 +Xx_297: .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4014000000000000, 0xc022000000000000 +Xx_298: .quad 0x4000000000000000, 0xc008000000000000, 0xc022000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc022000000000000, 0x402c000000000000 +Xx_299: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_300: .quad 0xbff0000000000000, 0x4008000000000000, 0x4014000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4014000000000000, 0xc02c000000000000 +Xx_301: .quad 0x4008000000000000, 0xc010000000000000, 0xc02c000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc02c000000000000, 0x4033000000000000 +Xx_302: .quad 0xbff0000000000000, 0x4010000000000000, 0x4014000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4014000000000000, 0xc033000000000000 +Xx_303: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_304: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_305: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_306: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_307: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_308: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_309: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc010000000000000, 0x4014000000000000 +Xx_310: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_311: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_312: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_313: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_314: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_315: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_316: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_317: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc014000000000000, 0x4035000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc014000000000000, 0x4035000000000000 +Xx_318: .quad 0xc008000000000000, 0x4010000000000000, 0x4030000000000000, 0xc035000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4030000000000000, 0xc035000000000000 +Xx_319: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc014000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc014000000000000, 0x4030000000000000 +Xx_320: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc014000000000000 +Xx_321: .quad 0xc000000000000000, 0x4008000000000000, 0x4026000000000000, 0xc030000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x4026000000000000, 0xc030000000000000 +Xx_322: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 +Xx_323: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 +Xx_324: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 +Xx_325: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc014000000000000, 0x4026000000000000 +Xx_326: .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 +Xx_327: .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 +Xx_328: .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 +Xx_329: .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4018000000000000, 0xc026000000000000 +Xx_330: .quad 0x4000000000000000, 0xc008000000000000, 0xc026000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc026000000000000, 0x4031000000000000 +Xx_331: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_332: .quad 0xbff0000000000000, 0x4008000000000000, 0x4018000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4018000000000000, 0xc031000000000000 +Xx_333: .quad 0x4008000000000000, 0xc010000000000000, 0xc031000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc031000000000000, 0x4037000000000000 +Xx_334: .quad 0xbff0000000000000, 0x4010000000000000, 0x4018000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4018000000000000, 0xc037000000000000 +Xx_335: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_336: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_337: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_338: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_339: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_340: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_341: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc014000000000000, 0x4018000000000000 +Xx_342: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_343: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_344: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_345: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_346: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_347: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_348: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_349: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc018000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc018000000000000, 0x4039000000000000 +Xx_350: .quad 0xc008000000000000, 0x4010000000000000, 0x4033000000000000, 0xc039000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4033000000000000, 0xc039000000000000 +Xx_351: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc018000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc018000000000000, 0x4033000000000000 +Xx_352: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc018000000000000 +Xx_353: .quad 0xc000000000000000, 0x4008000000000000, 0x402a000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x402a000000000000, 0xc033000000000000 +Xx_354: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 +Xx_355: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 +Xx_356: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 +Xx_357: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc018000000000000, 0x402a000000000000 +Xx_358: .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 +Xx_359: .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 +Xx_360: .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 +Xx_361: .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x401c000000000000, 0xc02a000000000000 +Xx_362: .quad 0x4000000000000000, 0xc008000000000000, 0xc02a000000000000, 0x4034000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc02a000000000000, 0x4034000000000000 +Xx_363: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_364: .quad 0xbff0000000000000, 0x4008000000000000, 0x401c000000000000, 0xc034000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x401c000000000000, 0xc034000000000000 +Xx_365: .quad 0x4008000000000000, 0xc010000000000000, 0xc034000000000000, 0x403b000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc034000000000000, 0x403b000000000000 +Xx_366: .quad 0xbff0000000000000, 0x4010000000000000, 0x401c000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x401c000000000000, 0xc03b000000000000 +Xx_367: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_368: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_369: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_370: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_371: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_372: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_373: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc018000000000000, 0x401c000000000000 +Xx_374: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_375: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_376: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_377: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_378: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_379: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_380: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_381: .quad 0x3ff0000000000000, 0xc010000000000000, 0xc01c000000000000, 0x403d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc010000000000000, 0xc01c000000000000, 0x403d000000000000 +Xx_382: .quad 0xc008000000000000, 0x4010000000000000, 0x4036000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0xc008000000000000, 0x4010000000000000, 0x4036000000000000, 0xc03d000000000000 +Xx_383: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc01c000000000000, 0x4036000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc01c000000000000, 0x4036000000000000 +Xx_384: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc01c000000000000 +Xx_385: .quad 0xc000000000000000, 0x4008000000000000, 0x402e000000000000, 0xc036000000000000 # gcd_base_continued_fraction:106 .quad 0xc000000000000000, 0x4008000000000000, 0x402e000000000000, 0xc036000000000000 +Xx_386: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 +Xx_387: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 +Xx_388: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 +Xx_389: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc01c000000000000, 0x402e000000000000 +Xx_390: .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 +Xx_391: .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 +Xx_392: .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 +Xx_393: .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4020000000000000, 0xc02e000000000000 +Xx_394: .quad 0x4000000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x4000000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4037000000000000 +Xx_395: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_396: .quad 0xbff0000000000000, 0x4008000000000000, 0x4020000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4020000000000000, 0xc037000000000000 +Xx_397: .quad 0x4008000000000000, 0xc010000000000000, 0xc037000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x4008000000000000, 0xc010000000000000, 0xc037000000000000, 0x403f000000000000 +Xx_398: .quad 0xbff0000000000000, 0x4010000000000000, 0x4020000000000000, 0xc03f000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4010000000000000, 0x4020000000000000, 0xc03f000000000000 +Xx_399: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_400: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_401: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_402: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_403: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_404: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_405: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc01c000000000000, 0x4020000000000000 +Xx_406: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 +Xx_407: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 +Xx_408: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 +Xx_409: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 +Xx_410: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc020000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc020000000000000, 0x4039000000000000 +Xx_411: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc020000000000000 +Xx_412: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc020000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc020000000000000, 0x4031000000000000 +Xx_413: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc020000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc020000000000000, 0x4031000000000000 +Xx_414: .quad 0xbff0000000000000, 0x4000000000000000, 0x4022000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4022000000000000, 0xc031000000000000 +Xx_415: .quad 0xbff0000000000000, 0x4000000000000000, 0x4022000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4022000000000000, 0xc031000000000000 +Xx_416: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 +Xx_417: .quad 0xbff0000000000000, 0x4008000000000000, 0x4022000000000000, 0xc03a000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4022000000000000, 0xc03a000000000000 +Xx_418: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 +Xx_419: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 +Xx_420: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 +Xx_421: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc020000000000000, 0x4022000000000000 +Xx_422: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 +Xx_423: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 +Xx_424: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 +Xx_425: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 +Xx_426: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc022000000000000, 0x403c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc022000000000000, 0x403c000000000000 +Xx_427: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc022000000000000 +Xx_428: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc022000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc022000000000000, 0x4033000000000000 +Xx_429: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc022000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc022000000000000, 0x4033000000000000 +Xx_430: .quad 0xbff0000000000000, 0x4000000000000000, 0x4024000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4024000000000000, 0xc033000000000000 +Xx_431: .quad 0xbff0000000000000, 0x4000000000000000, 0x4024000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4024000000000000, 0xc033000000000000 +Xx_432: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 +Xx_433: .quad 0xbff0000000000000, 0x4008000000000000, 0x4024000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4024000000000000, 0xc03d000000000000 +Xx_434: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 +Xx_435: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 +Xx_436: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 +Xx_437: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc022000000000000, 0x4024000000000000 +Xx_438: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 +Xx_439: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 +Xx_440: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 +Xx_441: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 +Xx_442: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc024000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc024000000000000, 0x403f000000000000 +Xx_443: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc024000000000000 +Xx_444: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc024000000000000, 0x4035000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc024000000000000, 0x4035000000000000 +Xx_445: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc024000000000000, 0x4035000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc024000000000000, 0x4035000000000000 +Xx_446: .quad 0xbff0000000000000, 0x4000000000000000, 0x4026000000000000, 0xc035000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4026000000000000, 0xc035000000000000 +Xx_447: .quad 0xbff0000000000000, 0x4000000000000000, 0x4026000000000000, 0xc035000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4026000000000000, 0xc035000000000000 +Xx_448: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 +Xx_449: .quad 0xbff0000000000000, 0x4008000000000000, 0x4026000000000000, 0xc040000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4026000000000000, 0xc040000000000000 +Xx_450: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 +Xx_451: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 +Xx_452: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 +Xx_453: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc024000000000000, 0x4026000000000000 +Xx_454: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 +Xx_455: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 +Xx_456: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 +Xx_457: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 +Xx_458: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc026000000000000, 0x4041000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc026000000000000, 0x4041000000000000 +Xx_459: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc026000000000000 +Xx_460: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc026000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc026000000000000, 0x4037000000000000 +Xx_461: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc026000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc026000000000000, 0x4037000000000000 +Xx_462: .quad 0xbff0000000000000, 0x4000000000000000, 0x4028000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4028000000000000, 0xc037000000000000 +Xx_463: .quad 0xbff0000000000000, 0x4000000000000000, 0x4028000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4028000000000000, 0xc037000000000000 +Xx_464: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 +Xx_465: .quad 0xbff0000000000000, 0x4008000000000000, 0x4028000000000000, 0xc041800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4028000000000000, 0xc041800000000000 +Xx_466: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 +Xx_467: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 +Xx_468: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 +Xx_469: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc026000000000000, 0x4028000000000000 +Xx_470: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 +Xx_471: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 +Xx_472: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 +Xx_473: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 +Xx_474: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc028000000000000, 0x4042800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc028000000000000, 0x4042800000000000 +Xx_475: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc028000000000000 +Xx_476: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc028000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc028000000000000, 0x4039000000000000 +Xx_477: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc028000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc028000000000000, 0x4039000000000000 +Xx_478: .quad 0xbff0000000000000, 0x4000000000000000, 0x402a000000000000, 0xc039000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x402a000000000000, 0xc039000000000000 +Xx_479: .quad 0xbff0000000000000, 0x4000000000000000, 0x402a000000000000, 0xc039000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x402a000000000000, 0xc039000000000000 +Xx_480: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 +Xx_481: .quad 0xbff0000000000000, 0x4008000000000000, 0x402a000000000000, 0xc043000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x402a000000000000, 0xc043000000000000 +Xx_482: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 +Xx_483: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 +Xx_484: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 +Xx_485: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc028000000000000, 0x402a000000000000 +Xx_486: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 +Xx_487: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 +Xx_488: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 +Xx_489: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 +Xx_490: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc02a000000000000, 0x4044000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc02a000000000000, 0x4044000000000000 +Xx_491: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02a000000000000 +Xx_492: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02a000000000000, 0x403b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02a000000000000, 0x403b000000000000 +Xx_493: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02a000000000000, 0x403b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02a000000000000, 0x403b000000000000 +Xx_494: .quad 0xbff0000000000000, 0x4000000000000000, 0x402c000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x402c000000000000, 0xc03b000000000000 +Xx_495: .quad 0xbff0000000000000, 0x4000000000000000, 0x402c000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x402c000000000000, 0xc03b000000000000 +Xx_496: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 +Xx_497: .quad 0xbff0000000000000, 0x4008000000000000, 0x402c000000000000, 0xc044800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x402c000000000000, 0xc044800000000000 +Xx_498: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 +Xx_499: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 +Xx_500: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 +Xx_501: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x402c000000000000 +Xx_502: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 +Xx_503: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 +Xx_504: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 +Xx_505: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 +Xx_506: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc02c000000000000, 0x4045800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc02c000000000000, 0x4045800000000000 +Xx_507: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02c000000000000 +Xx_508: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02c000000000000, 0x403d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02c000000000000, 0x403d000000000000 +Xx_509: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02c000000000000, 0x403d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02c000000000000, 0x403d000000000000 +Xx_510: .quad 0xbff0000000000000, 0x4000000000000000, 0x402e000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x402e000000000000, 0xc03d000000000000 +Xx_511: .quad 0xbff0000000000000, 0x4000000000000000, 0x402e000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x402e000000000000, 0xc03d000000000000 +Xx_512: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 +Xx_513: .quad 0xbff0000000000000, 0x4008000000000000, 0x402e000000000000, 0xc046000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x402e000000000000, 0xc046000000000000 +Xx_514: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 +Xx_515: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 +Xx_516: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 +Xx_517: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02c000000000000, 0x402e000000000000 +Xx_518: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 +Xx_519: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 +Xx_520: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 +Xx_521: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 +Xx_522: .quad 0x3ff0000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4047000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4047000000000000 +Xx_523: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc02e000000000000 +Xx_524: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02e000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02e000000000000, 0x403f000000000000 +Xx_525: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02e000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc02e000000000000, 0x403f000000000000 +Xx_526: .quad 0xbff0000000000000, 0x4000000000000000, 0x4030000000000000, 0xc03f000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4030000000000000, 0xc03f000000000000 +Xx_527: .quad 0xbff0000000000000, 0x4000000000000000, 0x4030000000000000, 0xc03f000000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4030000000000000, 0xc03f000000000000 +Xx_528: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 +Xx_529: .quad 0xbff0000000000000, 0x4008000000000000, 0x4030000000000000, 0xc047800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4008000000000000, 0x4030000000000000, 0xc047800000000000 +Xx_530: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 +Xx_531: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 +Xx_532: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 +Xx_533: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc02e000000000000, 0x4030000000000000 +Xx_534: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc030000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc030000000000000 +Xx_535: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc030000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc030000000000000 +Xx_536: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc030000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc030000000000000 +Xx_537: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc030000000000000, 0x4040800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc030000000000000, 0x4040800000000000 +Xx_538: .quad 0xbff0000000000000, 0x4000000000000000, 0x4031000000000000, 0xc040800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4031000000000000, 0xc040800000000000 +Xx_539: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc030000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc030000000000000, 0x4031000000000000 +Xx_540: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc030000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc030000000000000, 0x4031000000000000 +Xx_541: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc030000000000000, 0x4031000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc030000000000000, 0x4031000000000000 +Xx_542: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc031000000000000 +Xx_543: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc031000000000000 +Xx_544: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc031000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc031000000000000 +Xx_545: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc031000000000000, 0x4041800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc031000000000000, 0x4041800000000000 +Xx_546: .quad 0xbff0000000000000, 0x4000000000000000, 0x4032000000000000, 0xc041800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4032000000000000, 0xc041800000000000 +Xx_547: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc031000000000000, 0x4032000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc031000000000000, 0x4032000000000000 +Xx_548: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc031000000000000, 0x4032000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc031000000000000, 0x4032000000000000 +Xx_549: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc031000000000000, 0x4032000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc031000000000000, 0x4032000000000000 +Xx_550: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc032000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc032000000000000 +Xx_551: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc032000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc032000000000000 +Xx_552: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc032000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc032000000000000 +Xx_553: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc032000000000000, 0x4042800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc032000000000000, 0x4042800000000000 +Xx_554: .quad 0xbff0000000000000, 0x4000000000000000, 0x4033000000000000, 0xc042800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4033000000000000, 0xc042800000000000 +Xx_555: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc032000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc032000000000000, 0x4033000000000000 +Xx_556: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc032000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc032000000000000, 0x4033000000000000 +Xx_557: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc032000000000000, 0x4033000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc032000000000000, 0x4033000000000000 +Xx_558: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc033000000000000 +Xx_559: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc033000000000000 +Xx_560: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc033000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc033000000000000 +Xx_561: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc033000000000000, 0x4043800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc033000000000000, 0x4043800000000000 +Xx_562: .quad 0xbff0000000000000, 0x4000000000000000, 0x4034000000000000, 0xc043800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4034000000000000, 0xc043800000000000 +Xx_563: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc033000000000000, 0x4034000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc033000000000000, 0x4034000000000000 +Xx_564: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc033000000000000, 0x4034000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc033000000000000, 0x4034000000000000 +Xx_565: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc033000000000000, 0x4034000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc033000000000000, 0x4034000000000000 +Xx_566: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc034000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc034000000000000 +Xx_567: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc034000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc034000000000000 +Xx_568: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc034000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc034000000000000 +Xx_569: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc034000000000000, 0x4044800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc034000000000000, 0x4044800000000000 +Xx_570: .quad 0xbff0000000000000, 0x4000000000000000, 0x4035000000000000, 0xc044800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4035000000000000, 0xc044800000000000 +Xx_571: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc034000000000000, 0x4035000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc034000000000000, 0x4035000000000000 +Xx_572: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc034000000000000, 0x4035000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc034000000000000, 0x4035000000000000 +Xx_573: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc034000000000000, 0x4035000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc034000000000000, 0x4035000000000000 +Xx_574: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc035000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc035000000000000 +Xx_575: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc035000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc035000000000000 +Xx_576: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc035000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc035000000000000 +Xx_577: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc035000000000000, 0x4045800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc035000000000000, 0x4045800000000000 +Xx_578: .quad 0xbff0000000000000, 0x4000000000000000, 0x4036000000000000, 0xc045800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4036000000000000, 0xc045800000000000 +Xx_579: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc035000000000000, 0x4036000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc035000000000000, 0x4036000000000000 +Xx_580: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc035000000000000, 0x4036000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc035000000000000, 0x4036000000000000 +Xx_581: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc035000000000000, 0x4036000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc035000000000000, 0x4036000000000000 +Xx_582: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc036000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc036000000000000 +Xx_583: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc036000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc036000000000000 +Xx_584: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc036000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc036000000000000 +Xx_585: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc036000000000000, 0x4046800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc036000000000000, 0x4046800000000000 +Xx_586: .quad 0xbff0000000000000, 0x4000000000000000, 0x4037000000000000, 0xc046800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4037000000000000, 0xc046800000000000 +Xx_587: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc036000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc036000000000000, 0x4037000000000000 +Xx_588: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc036000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc036000000000000, 0x4037000000000000 +Xx_589: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc036000000000000, 0x4037000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc036000000000000, 0x4037000000000000 +Xx_590: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc037000000000000 +Xx_591: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc037000000000000 +Xx_592: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc037000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc037000000000000 +Xx_593: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc037000000000000, 0x4047800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc037000000000000, 0x4047800000000000 +Xx_594: .quad 0xbff0000000000000, 0x4000000000000000, 0x4038000000000000, 0xc047800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4038000000000000, 0xc047800000000000 +Xx_595: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc037000000000000, 0x4038000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc037000000000000, 0x4038000000000000 +Xx_596: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc037000000000000, 0x4038000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc037000000000000, 0x4038000000000000 +Xx_597: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc037000000000000, 0x4038000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc037000000000000, 0x4038000000000000 +Xx_598: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc038000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc038000000000000 +Xx_599: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc038000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc038000000000000 +Xx_600: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc038000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc038000000000000 +Xx_601: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc038000000000000, 0x4048800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc038000000000000, 0x4048800000000000 +Xx_602: .quad 0xbff0000000000000, 0x4000000000000000, 0x4039000000000000, 0xc048800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4039000000000000, 0xc048800000000000 +Xx_603: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc038000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc038000000000000, 0x4039000000000000 +Xx_604: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc038000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc038000000000000, 0x4039000000000000 +Xx_605: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc038000000000000, 0x4039000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc038000000000000, 0x4039000000000000 +Xx_606: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc039000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc039000000000000 +Xx_607: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc039000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc039000000000000 +Xx_608: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc039000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc039000000000000 +Xx_609: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc039000000000000, 0x4049800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc039000000000000, 0x4049800000000000 +Xx_610: .quad 0xbff0000000000000, 0x4000000000000000, 0x403a000000000000, 0xc049800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x403a000000000000, 0xc049800000000000 +Xx_611: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc039000000000000, 0x403a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc039000000000000, 0x403a000000000000 +Xx_612: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc039000000000000, 0x403a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc039000000000000, 0x403a000000000000 +Xx_613: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc039000000000000, 0x403a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc039000000000000, 0x403a000000000000 +Xx_614: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03a000000000000 +Xx_615: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03a000000000000 +Xx_616: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03a000000000000 +Xx_617: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03a000000000000, 0x404a800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03a000000000000, 0x404a800000000000 +Xx_618: .quad 0xbff0000000000000, 0x4000000000000000, 0x403b000000000000, 0xc04a800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x403b000000000000, 0xc04a800000000000 +Xx_619: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03a000000000000, 0x403b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03a000000000000, 0x403b000000000000 +Xx_620: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03a000000000000, 0x403b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03a000000000000, 0x403b000000000000 +Xx_621: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03a000000000000, 0x403b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03a000000000000, 0x403b000000000000 +Xx_622: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03b000000000000 +Xx_623: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03b000000000000 +Xx_624: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03b000000000000 +Xx_625: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03b000000000000, 0x404b800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03b000000000000, 0x404b800000000000 +Xx_626: .quad 0xbff0000000000000, 0x4000000000000000, 0x403c000000000000, 0xc04b800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x403c000000000000, 0xc04b800000000000 +Xx_627: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03b000000000000, 0x403c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03b000000000000, 0x403c000000000000 +Xx_628: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03b000000000000, 0x403c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03b000000000000, 0x403c000000000000 +Xx_629: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03b000000000000, 0x403c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03b000000000000, 0x403c000000000000 +Xx_630: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03c000000000000 +Xx_631: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03c000000000000 +Xx_632: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03c000000000000 +Xx_633: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03c000000000000, 0x404c800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03c000000000000, 0x404c800000000000 +Xx_634: .quad 0xbff0000000000000, 0x4000000000000000, 0x403d000000000000, 0xc04c800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x403d000000000000, 0xc04c800000000000 +Xx_635: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03c000000000000, 0x403d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03c000000000000, 0x403d000000000000 +Xx_636: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03c000000000000, 0x403d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03c000000000000, 0x403d000000000000 +Xx_637: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03c000000000000, 0x403d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03c000000000000, 0x403d000000000000 +Xx_638: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03d000000000000 +Xx_639: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03d000000000000 +Xx_640: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03d000000000000 +Xx_641: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03d000000000000, 0x404d800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03d000000000000, 0x404d800000000000 +Xx_642: .quad 0xbff0000000000000, 0x4000000000000000, 0x403e000000000000, 0xc04d800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x403e000000000000, 0xc04d800000000000 +Xx_643: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03d000000000000, 0x403e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03d000000000000, 0x403e000000000000 +Xx_644: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03d000000000000, 0x403e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03d000000000000, 0x403e000000000000 +Xx_645: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03d000000000000, 0x403e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03d000000000000, 0x403e000000000000 +Xx_646: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03e000000000000 +Xx_647: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03e000000000000 +Xx_648: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03e000000000000 +Xx_649: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03e000000000000, 0x404e800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03e000000000000, 0x404e800000000000 +Xx_650: .quad 0xbff0000000000000, 0x4000000000000000, 0x403f000000000000, 0xc04e800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x403f000000000000, 0xc04e800000000000 +Xx_651: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03e000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03e000000000000, 0x403f000000000000 +Xx_652: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03e000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03e000000000000, 0x403f000000000000 +Xx_653: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03e000000000000, 0x403f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03e000000000000, 0x403f000000000000 +Xx_654: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03f000000000000 +Xx_655: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03f000000000000 +Xx_656: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc03f000000000000 +Xx_657: .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03f000000000000, 0x404f800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xc000000000000000, 0xc03f000000000000, 0x404f800000000000 +Xx_658: .quad 0xbff0000000000000, 0x4000000000000000, 0x4040000000000000, 0xc04f800000000000 # gcd_base_continued_fraction:106 .quad 0xbff0000000000000, 0x4000000000000000, 0x4040000000000000, 0xc04f800000000000 +Xx_659: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03f000000000000, 0x4040000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03f000000000000, 0x4040000000000000 +Xx_660: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03f000000000000, 0x4040000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03f000000000000, 0x4040000000000000 +Xx_661: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03f000000000000, 0x4040000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc03f000000000000, 0x4040000000000000 +Xx_662: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040000000000000 +Xx_663: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040000000000000 +Xx_664: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040000000000000, 0x4040800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040000000000000, 0x4040800000000000 +Xx_665: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040000000000000, 0x4040800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040000000000000, 0x4040800000000000 +Xx_666: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040800000000000 +Xx_667: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc040800000000000 +Xx_668: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040800000000000, 0x4041000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040800000000000, 0x4041000000000000 +Xx_669: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040800000000000, 0x4041000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc040800000000000, 0x4041000000000000 +Xx_670: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041000000000000 +Xx_671: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041000000000000 +Xx_672: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041000000000000, 0x4041800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041000000000000, 0x4041800000000000 +Xx_673: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041000000000000, 0x4041800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041000000000000, 0x4041800000000000 +Xx_674: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041800000000000 +Xx_675: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc041800000000000 +Xx_676: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041800000000000, 0x4042000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041800000000000, 0x4042000000000000 +Xx_677: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041800000000000, 0x4042000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc041800000000000, 0x4042000000000000 +Xx_678: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042000000000000 +Xx_679: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042000000000000 +Xx_680: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042000000000000, 0x4042800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042000000000000, 0x4042800000000000 +Xx_681: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042000000000000, 0x4042800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042000000000000, 0x4042800000000000 +Xx_682: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042800000000000 +Xx_683: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc042800000000000 +Xx_684: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042800000000000, 0x4043000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042800000000000, 0x4043000000000000 +Xx_685: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042800000000000, 0x4043000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc042800000000000, 0x4043000000000000 +Xx_686: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043000000000000 +Xx_687: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043000000000000 +Xx_688: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043000000000000, 0x4043800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043000000000000, 0x4043800000000000 +Xx_689: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043000000000000, 0x4043800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043000000000000, 0x4043800000000000 +Xx_690: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043800000000000 +Xx_691: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc043800000000000 +Xx_692: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043800000000000, 0x4044000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043800000000000, 0x4044000000000000 +Xx_693: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043800000000000, 0x4044000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc043800000000000, 0x4044000000000000 +Xx_694: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044000000000000 +Xx_695: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044000000000000 +Xx_696: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044000000000000, 0x4044800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044000000000000, 0x4044800000000000 +Xx_697: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044000000000000, 0x4044800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044000000000000, 0x4044800000000000 +Xx_698: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044800000000000 +Xx_699: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc044800000000000 +Xx_700: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044800000000000, 0x4045000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044800000000000, 0x4045000000000000 +Xx_701: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044800000000000, 0x4045000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc044800000000000, 0x4045000000000000 +Xx_702: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045000000000000 +Xx_703: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045000000000000 +Xx_704: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045000000000000, 0x4045800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045000000000000, 0x4045800000000000 +Xx_705: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045000000000000, 0x4045800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045000000000000, 0x4045800000000000 +Xx_706: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045800000000000 +Xx_707: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc045800000000000 +Xx_708: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045800000000000, 0x4046000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045800000000000, 0x4046000000000000 +Xx_709: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045800000000000, 0x4046000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc045800000000000, 0x4046000000000000 +Xx_710: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046000000000000 +Xx_711: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046000000000000 +Xx_712: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046000000000000, 0x4046800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046000000000000, 0x4046800000000000 +Xx_713: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046000000000000, 0x4046800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046000000000000, 0x4046800000000000 +Xx_714: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046800000000000 +Xx_715: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc046800000000000 +Xx_716: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046800000000000, 0x4047000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046800000000000, 0x4047000000000000 +Xx_717: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046800000000000, 0x4047000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc046800000000000, 0x4047000000000000 +Xx_718: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047000000000000 +Xx_719: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047000000000000 +Xx_720: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047000000000000, 0x4047800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047000000000000, 0x4047800000000000 +Xx_721: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047000000000000, 0x4047800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047000000000000, 0x4047800000000000 +Xx_722: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047800000000000 +Xx_723: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc047800000000000 +Xx_724: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047800000000000, 0x4048000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047800000000000, 0x4048000000000000 +Xx_725: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047800000000000, 0x4048000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc047800000000000, 0x4048000000000000 +Xx_726: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048000000000000 +Xx_727: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048000000000000 +Xx_728: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048000000000000, 0x4048800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048000000000000, 0x4048800000000000 +Xx_729: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048000000000000, 0x4048800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048000000000000, 0x4048800000000000 +Xx_730: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048800000000000 +Xx_731: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc048800000000000 +Xx_732: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048800000000000, 0x4049000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048800000000000, 0x4049000000000000 +Xx_733: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048800000000000, 0x4049000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc048800000000000, 0x4049000000000000 +Xx_734: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049000000000000 +Xx_735: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049000000000000 +Xx_736: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049000000000000, 0x4049800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049000000000000, 0x4049800000000000 +Xx_737: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049000000000000, 0x4049800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049000000000000, 0x4049800000000000 +Xx_738: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049800000000000 +Xx_739: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc049800000000000 +Xx_740: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049800000000000, 0x404a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049800000000000, 0x404a000000000000 +Xx_741: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049800000000000, 0x404a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc049800000000000, 0x404a000000000000 +Xx_742: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a000000000000 +Xx_743: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a000000000000 +Xx_744: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a000000000000, 0x404a800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a000000000000, 0x404a800000000000 +Xx_745: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a000000000000, 0x404a800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a000000000000, 0x404a800000000000 +Xx_746: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a800000000000 +Xx_747: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04a800000000000 +Xx_748: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a800000000000, 0x404b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a800000000000, 0x404b000000000000 +Xx_749: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a800000000000, 0x404b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04a800000000000, 0x404b000000000000 +Xx_750: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b000000000000 +Xx_751: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b000000000000 +Xx_752: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b000000000000, 0x404b800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b000000000000, 0x404b800000000000 +Xx_753: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b000000000000, 0x404b800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b000000000000, 0x404b800000000000 +Xx_754: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b800000000000 +Xx_755: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04b800000000000 +Xx_756: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b800000000000, 0x404c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b800000000000, 0x404c000000000000 +Xx_757: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b800000000000, 0x404c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04b800000000000, 0x404c000000000000 +Xx_758: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c000000000000 +Xx_759: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c000000000000 +Xx_760: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c000000000000, 0x404c800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c000000000000, 0x404c800000000000 +Xx_761: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c000000000000, 0x404c800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c000000000000, 0x404c800000000000 +Xx_762: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c800000000000 +Xx_763: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04c800000000000 +Xx_764: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c800000000000, 0x404d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c800000000000, 0x404d000000000000 +Xx_765: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c800000000000, 0x404d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04c800000000000, 0x404d000000000000 +Xx_766: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d000000000000 +Xx_767: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d000000000000 +Xx_768: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d000000000000, 0x404d800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d000000000000, 0x404d800000000000 +Xx_769: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d000000000000, 0x404d800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d000000000000, 0x404d800000000000 +Xx_770: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d800000000000 +Xx_771: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04d800000000000 +Xx_772: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d800000000000, 0x404e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d800000000000, 0x404e000000000000 +Xx_773: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d800000000000, 0x404e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04d800000000000, 0x404e000000000000 +Xx_774: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e000000000000 +Xx_775: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e000000000000 +Xx_776: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e000000000000, 0x404e800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e000000000000, 0x404e800000000000 +Xx_777: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e000000000000, 0x404e800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e000000000000, 0x404e800000000000 +Xx_778: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e800000000000 +Xx_779: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04e800000000000 +Xx_780: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e800000000000, 0x404f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e800000000000, 0x404f000000000000 +Xx_781: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e800000000000, 0x404f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04e800000000000, 0x404f000000000000 +Xx_782: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f000000000000 +Xx_783: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f000000000000 +Xx_784: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f000000000000, 0x404f800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f000000000000, 0x404f800000000000 +Xx_785: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f000000000000, 0x404f800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f000000000000, 0x404f800000000000 +Xx_786: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f800000000000 +Xx_787: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc04f800000000000 +Xx_788: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f800000000000, 0x4050000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f800000000000, 0x4050000000000000 +Xx_789: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f800000000000, 0x4050000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc04f800000000000, 0x4050000000000000 +Xx_790: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050000000000000 +Xx_791: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050000000000000, 0x4050400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050000000000000, 0x4050400000000000 +Xx_792: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050400000000000 +Xx_793: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050400000000000, 0x4050800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050400000000000, 0x4050800000000000 +Xx_794: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050800000000000 +Xx_795: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050800000000000, 0x4050c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050800000000000, 0x4050c00000000000 +Xx_796: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc050c00000000000 +Xx_797: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050c00000000000, 0x4051000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc050c00000000000, 0x4051000000000000 +Xx_798: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051000000000000 +Xx_799: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051000000000000, 0x4051400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051000000000000, 0x4051400000000000 +Xx_800: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051400000000000 +Xx_801: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051400000000000, 0x4051800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051400000000000, 0x4051800000000000 +Xx_802: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051800000000000 +Xx_803: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051800000000000, 0x4051c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051800000000000, 0x4051c00000000000 +Xx_804: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc051c00000000000 +Xx_805: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051c00000000000, 0x4052000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc051c00000000000, 0x4052000000000000 +Xx_806: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052000000000000 +Xx_807: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052000000000000, 0x4052400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052000000000000, 0x4052400000000000 +Xx_808: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052400000000000 +Xx_809: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052400000000000, 0x4052800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052400000000000, 0x4052800000000000 +Xx_810: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052800000000000 +Xx_811: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052800000000000, 0x4052c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052800000000000, 0x4052c00000000000 +Xx_812: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc052c00000000000 +Xx_813: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052c00000000000, 0x4053000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc052c00000000000, 0x4053000000000000 +Xx_814: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053000000000000 +Xx_815: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053000000000000, 0x4053400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053000000000000, 0x4053400000000000 +Xx_816: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053400000000000 +Xx_817: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053400000000000, 0x4053800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053400000000000, 0x4053800000000000 +Xx_818: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053800000000000 +Xx_819: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053800000000000, 0x4053c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053800000000000, 0x4053c00000000000 +Xx_820: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc053c00000000000 +Xx_821: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053c00000000000, 0x4054000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc053c00000000000, 0x4054000000000000 +Xx_822: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054000000000000 +Xx_823: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054000000000000, 0x4054400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054000000000000, 0x4054400000000000 +Xx_824: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054400000000000 +Xx_825: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054400000000000, 0x4054800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054400000000000, 0x4054800000000000 +Xx_826: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054800000000000 +Xx_827: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054800000000000, 0x4054c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054800000000000, 0x4054c00000000000 +Xx_828: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc054c00000000000 +Xx_829: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054c00000000000, 0x4055000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc054c00000000000, 0x4055000000000000 +Xx_830: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055000000000000 +Xx_831: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055000000000000, 0x4055400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055000000000000, 0x4055400000000000 +Xx_832: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055400000000000 +Xx_833: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055400000000000, 0x4055800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055400000000000, 0x4055800000000000 +Xx_834: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055800000000000 +Xx_835: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055800000000000, 0x4055c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055800000000000, 0x4055c00000000000 +Xx_836: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc055c00000000000 +Xx_837: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055c00000000000, 0x4056000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc055c00000000000, 0x4056000000000000 +Xx_838: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056000000000000 +Xx_839: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056000000000000, 0x4056400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056000000000000, 0x4056400000000000 +Xx_840: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056400000000000 +Xx_841: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056400000000000, 0x4056800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056400000000000, 0x4056800000000000 +Xx_842: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056800000000000 +Xx_843: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056800000000000, 0x4056c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056800000000000, 0x4056c00000000000 +Xx_844: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc056c00000000000 +Xx_845: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056c00000000000, 0x4057000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc056c00000000000, 0x4057000000000000 +Xx_846: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057000000000000 +Xx_847: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057000000000000, 0x4057400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057000000000000, 0x4057400000000000 +Xx_848: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057400000000000 +Xx_849: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057400000000000, 0x4057800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057400000000000, 0x4057800000000000 +Xx_850: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057800000000000 +Xx_851: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057800000000000, 0x4057c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057800000000000, 0x4057c00000000000 +Xx_852: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc057c00000000000 +Xx_853: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057c00000000000, 0x4058000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc057c00000000000, 0x4058000000000000 +Xx_854: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058000000000000 +Xx_855: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058000000000000, 0x4058400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058000000000000, 0x4058400000000000 +Xx_856: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058400000000000 +Xx_857: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058400000000000, 0x4058800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058400000000000, 0x4058800000000000 +Xx_858: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058800000000000 +Xx_859: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058800000000000, 0x4058c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058800000000000, 0x4058c00000000000 +Xx_860: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc058c00000000000 +Xx_861: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058c00000000000, 0x4059000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc058c00000000000, 0x4059000000000000 +Xx_862: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059000000000000 +Xx_863: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059000000000000, 0x4059400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059000000000000, 0x4059400000000000 +Xx_864: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059400000000000 +Xx_865: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059400000000000, 0x4059800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059400000000000, 0x4059800000000000 +Xx_866: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059800000000000 +Xx_867: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059800000000000, 0x4059c00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059800000000000, 0x4059c00000000000 +Xx_868: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc059c00000000000 +Xx_869: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059c00000000000, 0x405a000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc059c00000000000, 0x405a000000000000 +Xx_870: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05a000000000000 +Xx_871: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05a000000000000, 0x405a400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05a000000000000, 0x405a400000000000 +Xx_872: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05a400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05a400000000000 +Xx_873: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05a400000000000, 0x405a800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05a400000000000, 0x405a800000000000 +Xx_874: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05a800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05a800000000000 +Xx_875: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05a800000000000, 0x405ac00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05a800000000000, 0x405ac00000000000 +Xx_876: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05ac00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05ac00000000000 +Xx_877: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05ac00000000000, 0x405b000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05ac00000000000, 0x405b000000000000 +Xx_878: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05b000000000000 +Xx_879: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05b000000000000, 0x405b400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05b000000000000, 0x405b400000000000 +Xx_880: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05b400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05b400000000000 +Xx_881: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05b400000000000, 0x405b800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05b400000000000, 0x405b800000000000 +Xx_882: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05b800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05b800000000000 +Xx_883: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05b800000000000, 0x405bc00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05b800000000000, 0x405bc00000000000 +Xx_884: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05bc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05bc00000000000 +Xx_885: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05bc00000000000, 0x405c000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05bc00000000000, 0x405c000000000000 +Xx_886: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05c000000000000 +Xx_887: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05c000000000000, 0x405c400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05c000000000000, 0x405c400000000000 +Xx_888: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05c400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05c400000000000 +Xx_889: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05c400000000000, 0x405c800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05c400000000000, 0x405c800000000000 +Xx_890: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05c800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05c800000000000 +Xx_891: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05c800000000000, 0x405cc00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05c800000000000, 0x405cc00000000000 +Xx_892: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05cc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05cc00000000000 +Xx_893: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05cc00000000000, 0x405d000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05cc00000000000, 0x405d000000000000 +Xx_894: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05d000000000000 +Xx_895: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05d000000000000, 0x405d400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05d000000000000, 0x405d400000000000 +Xx_896: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05d400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05d400000000000 +Xx_897: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05d400000000000, 0x405d800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05d400000000000, 0x405d800000000000 +Xx_898: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05d800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05d800000000000 +Xx_899: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05d800000000000, 0x405dc00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05d800000000000, 0x405dc00000000000 +Xx_900: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05dc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05dc00000000000 +Xx_901: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05dc00000000000, 0x405e000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05dc00000000000, 0x405e000000000000 +Xx_902: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05e000000000000 +Xx_903: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05e000000000000, 0x405e400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05e000000000000, 0x405e400000000000 +Xx_904: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05e400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05e400000000000 +Xx_905: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05e400000000000, 0x405e800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05e400000000000, 0x405e800000000000 +Xx_906: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05e800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05e800000000000 +Xx_907: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05e800000000000, 0x405ec00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05e800000000000, 0x405ec00000000000 +Xx_908: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05ec00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05ec00000000000 +Xx_909: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05ec00000000000, 0x405f000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05ec00000000000, 0x405f000000000000 +Xx_910: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05f000000000000 +Xx_911: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05f000000000000, 0x405f400000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05f000000000000, 0x405f400000000000 +Xx_912: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05f400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05f400000000000 +Xx_913: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05f400000000000, 0x405f800000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05f400000000000, 0x405f800000000000 +Xx_914: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05f800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05f800000000000 +Xx_915: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05f800000000000, 0x405fc00000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05f800000000000, 0x405fc00000000000 +Xx_916: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05fc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc05fc00000000000 +Xx_917: .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05fc00000000000, 0x4060000000000000 # gcd_base_continued_fraction:106 .quad 0x3ff0000000000000, 0xbff0000000000000, 0xc05fc00000000000, 0x4060000000000000 +Xx_918: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060000000000000 +Xx_919: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060200000000000 +Xx_920: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060400000000000 +Xx_921: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060600000000000 +Xx_922: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060800000000000 +Xx_923: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060a00000000000 +Xx_924: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060c00000000000 +Xx_925: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc060e00000000000 +Xx_926: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061000000000000 +Xx_927: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061200000000000 +Xx_928: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061400000000000 +Xx_929: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061600000000000 +Xx_930: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061800000000000 +Xx_931: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061a00000000000 +Xx_932: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061c00000000000 +Xx_933: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc061e00000000000 +Xx_934: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062000000000000 +Xx_935: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062200000000000 +Xx_936: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062400000000000 +Xx_937: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062600000000000 +Xx_938: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062800000000000 +Xx_939: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062a00000000000 +Xx_940: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062c00000000000 +Xx_941: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc062e00000000000 +Xx_942: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063000000000000 +Xx_943: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063200000000000 +Xx_944: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063400000000000 +Xx_945: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063600000000000 +Xx_946: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063800000000000 +Xx_947: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063a00000000000 +Xx_948: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063c00000000000 +Xx_949: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc063e00000000000 +Xx_950: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064000000000000 +Xx_951: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064200000000000 +Xx_952: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064400000000000 +Xx_953: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064600000000000 +Xx_954: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064800000000000 +Xx_955: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064a00000000000 +Xx_956: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064c00000000000 +Xx_957: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc064e00000000000 +Xx_958: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065000000000000 +Xx_959: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065200000000000 +Xx_960: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065400000000000 +Xx_961: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065600000000000 +Xx_962: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065800000000000 +Xx_963: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065a00000000000 +Xx_964: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065c00000000000 +Xx_965: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc065e00000000000 +Xx_966: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066000000000000 +Xx_967: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066200000000000 +Xx_968: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066400000000000 +Xx_969: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066600000000000 +Xx_970: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066800000000000 +Xx_971: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066a00000000000 +Xx_972: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066c00000000000 +Xx_973: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc066e00000000000 +Xx_974: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067000000000000 +Xx_975: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067200000000000 +Xx_976: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067400000000000 +Xx_977: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067600000000000 +Xx_978: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067800000000000 +Xx_979: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067a00000000000 +Xx_980: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067c00000000000 +Xx_981: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc067e00000000000 +Xx_982: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068000000000000 +Xx_983: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068200000000000 +Xx_984: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068400000000000 +Xx_985: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068600000000000 +Xx_986: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068800000000000 +Xx_987: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068a00000000000 +Xx_988: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068c00000000000 +Xx_989: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc068e00000000000 +Xx_990: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069000000000000 +Xx_991: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069200000000000 +Xx_992: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069400000000000 +Xx_993: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069600000000000 +Xx_994: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069800000000000 +Xx_995: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069a00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069a00000000000 +Xx_996: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069c00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069c00000000000 +Xx_997: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069e00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc069e00000000000 +Xx_998: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a000000000000 +Xx_999: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a200000000000 +Xx_1000: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a400000000000 +Xx_1001: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a600000000000 +Xx_1002: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06a800000000000 +Xx_1003: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06aa00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06aa00000000000 +Xx_1004: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ac00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ac00000000000 +Xx_1005: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ae00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ae00000000000 +Xx_1006: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b000000000000 +Xx_1007: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b200000000000 +Xx_1008: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b400000000000 +Xx_1009: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b600000000000 +Xx_1010: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06b800000000000 +Xx_1011: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ba00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ba00000000000 +Xx_1012: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06bc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06bc00000000000 +Xx_1013: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06be00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06be00000000000 +Xx_1014: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c000000000000 +Xx_1015: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c200000000000 +Xx_1016: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c400000000000 +Xx_1017: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c600000000000 +Xx_1018: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06c800000000000 +Xx_1019: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ca00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ca00000000000 +Xx_1020: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06cc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06cc00000000000 +Xx_1021: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ce00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ce00000000000 +Xx_1022: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d000000000000 +Xx_1023: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d200000000000 +Xx_1024: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d400000000000 +Xx_1025: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d600000000000 +Xx_1026: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06d800000000000 +Xx_1027: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06da00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06da00000000000 +Xx_1028: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06dc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06dc00000000000 +Xx_1029: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06de00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06de00000000000 +Xx_1030: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e000000000000 +Xx_1031: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e200000000000 +Xx_1032: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e400000000000 +Xx_1033: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e600000000000 +Xx_1034: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06e800000000000 +Xx_1035: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ea00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ea00000000000 +Xx_1036: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ec00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ec00000000000 +Xx_1037: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ee00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06ee00000000000 +Xx_1038: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f000000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f000000000000 +Xx_1039: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f200000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f200000000000 +Xx_1040: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f400000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f400000000000 +Xx_1041: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f600000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f600000000000 +Xx_1042: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f800000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06f800000000000 +Xx_1043: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06fa00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06fa00000000000 +Xx_1044: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06fc00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06fc00000000000 +Xx_1045: .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06fe00000000000 # gcd_base_continued_fraction:106 .quad 0x0, 0x3ff0000000000000, 0x3ff0000000000000, 0xc06fe00000000000 +Xx_1046: .text # gcd_base_continued_fraction:110 .text +Xx_1047: .text # gcd_base_continued_fraction:109 .text +Xx_1048: .balign 16 # gcd_base_continued_fraction:113 .balign 16 +Xx_1049: _label_3: # gcd_base_continued_fraction:114 _label_3: +Xx_1050: .quad 0x1fffffffffff # gcd_base_continued_fraction:115 .quad 0x1fffffffffff +Xx_1051: .quad 0x1fffffffffff # gcd_base_continued_fraction:116 .quad 0x1fffffffffff +Xx_1052: .text # gcd_base_continued_fraction:117 .text +Xx_1053: MOV R12, [RIP+_label_3] # gcd_base_continued_fraction:141 MOV `c_table_delta_minus_1, [RIP+_label_3] +Xx_1054: MOV R11, 0x5 # gcd_base_continued_fraction:146 MOV `loop_counter, 0x5 +Xx_1055: .text # gcd_base_continued_fraction:109 .text +Xx_1056: .balign 16 # gcd_base_continued_fraction:113 .balign 16 +Xx_1057: _label_6: # gcd_base_continued_fraction:114 _label_6: +Xx_1058: .quad 0x3ff0000000000000 # gcd_base_continued_fraction:115 .quad 0x3ff0000000000000 +Xx_1059: .quad 0x0 # gcd_base_continued_fraction:116 .quad 0x0 +Xx_1060: .text # gcd_base_continued_fraction:117 .text +Xx_1061: MOVAPD XMM1, [RIP+_label_6] # gcd_base_continued_fraction:148 MOVAPD `u, [RIP+_label_6] +Xx_1062: .text # gcd_base_continued_fraction:109 .text +Xx_1063: .balign 16 # gcd_base_continued_fraction:113 .balign 16 +Xx_1064: _label_7: # gcd_base_continued_fraction:114 _label_7: +Xx_1065: .quad 0x0 # gcd_base_continued_fraction:115 .quad 0x0 +Xx_1066: .quad 0x3ff0000000000000 # gcd_base_continued_fraction:116 .quad 0x3ff0000000000000 +Xx_1067: .text # gcd_base_continued_fraction:117 .text +Xx_1068: MOVAPD XMM2, [RIP+_label_7] # gcd_base_continued_fraction:149 MOVAPD `v, [RIP+_label_7] +Xx_1069: .text # gcd_base_continued_fraction:109 .text +Xx_1070: .balign 16 # gcd_base_continued_fraction:113 .balign 16 +Xx_1071: _label_8: # gcd_base_continued_fraction:114 _label_8: +Xx_1072: .quad 0x433fffffffffffff # gcd_base_continued_fraction:115 .quad 0x433fffffffffffff +Xx_1073: .quad 0x433fffffffffffff # gcd_base_continued_fraction:116 .quad 0x433fffffffffffff +Xx_1074: .text # gcd_base_continued_fraction:117 .text +Xx_1075: MOVAPD XMM14, [RIP+_label_8] # gcd_base_continued_fraction:150 MOVAPD `range_check_range, [RIP+_label_8] +Xx_1076: .text # gcd_base_continued_fraction:109 .text +Xx_1077: .balign 16 # gcd_base_continued_fraction:113 .balign 16 +Xx_1078: _label_9: # gcd_base_continued_fraction:114 _label_9: +Xx_1079: .quad 0x7fffffffffffffff # gcd_base_continued_fraction:115 .quad 0x7fffffffffffffff +Xx_1080: .quad 0x7fffffffffffffff # gcd_base_continued_fraction:116 .quad 0x7fffffffffffffff +Xx_1081: .text # gcd_base_continued_fraction:117 .text +Xx_1082: MOVAPD XMM15, [RIP+_label_9] # gcd_base_continued_fraction:151 MOVAPD `double_abs_mask, [RIP+_label_9] +Xx_1083: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:154 MOVAPD `tmp, `ab +Xx_1084: SHUFPD XMM9, XMM9, 3 # gcd_base_continued_fraction:155 SHUFPD `tmp, `tmp, 3 +Xx_1085: MOVAPD XMM13, XMM0 # gcd_base_continued_fraction:156 MOVAPD `q, `ab +Xx_1086: DIVSD XMM13, XMM9 # gcd_base_continued_fraction:157 DIVSD `q, `tmp +Xx_1087: _label_5: # gcd_base_continued_fraction:160 _label_5: +Xx_1088: #gcd_base loop start # gcd_base_continued_fraction:166 #gcd_base loop start +Xx_1089: MOVQ RBX, XMM13 # gcd_base_continued_fraction:169 MOVQ `q_scalar, `q +Xx_1090: MOV RBP, RBX # gcd_base_continued_fraction:170 MOV `q_scalar_2, `q_scalar +Xx_1091: MOV R10, RBX # gcd_base_continued_fraction:171 MOV `q_scalar_3, `q_scalar +Xx_1092: SHR RBX, 0x28 # gcd_base_continued_fraction:175 SHR `q_scalar, 0x28 +Xx_1093: AND RBX, -32 # gcd_base_continued_fraction:176 AND `q_scalar, -32 +Xx_1094: SUB RBX, 0x3ff000 # gcd_base_continued_fraction:181 SUB `q_scalar, 0x3ff000 +Xx_1095: JB _label_10 # gcd_base_continued_fraction:182 JB _label_10 +Xx_1096: CMP RBX, 0x8000 # gcd_base_continued_fraction:183 CMP `q_scalar, 0x8000 +Xx_1097: JAE _label_10 # gcd_base_continued_fraction:184 JAE _label_10 +Xx_1098: LEA RSI,[RIP+gcd_base_table] # gcd_base_continued_fraction:189 LEA RSI,[RIP+gcd_base_table] +Xx_1099: MOVAPD XMM5, [RBX+RSI] # gcd_base_continued_fraction:190 MOVAPD `m_0, [`q_scalar+RSI] +Xx_1100: MOVAPD XMM6, [16+RBX+RSI] # gcd_base_continued_fraction:191 MOVAPD `m_1, [16+`q_scalar+RSI] +Xx_1101: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:199 MOVAPD `tmp, `ab +Xx_1102: CMPLEPD XMM9, XMM4 # gcd_base_continued_fraction:200 CMPLEPD `tmp, `ab_threshold +Xx_1103: PTEST XMM9, XMM9 # gcd_base_continued_fraction:201 PTEST `tmp, `tmp +Xx_1104: JNZ _label_4 # gcd_base_continued_fraction:202 JNZ _label_4 +Xx_1105: AND RBP, R12 # gcd_base_continued_fraction:205 AND `q_scalar_2, `c_table_delta_minus_1 +Xx_1106: JZ _label_10 # gcd_base_continued_fraction:206 JZ _label_10 +Xx_1107: CMP RBP, R12 # gcd_base_continued_fraction:207 CMP `q_scalar_2, `c_table_delta_minus_1 +Xx_1108: JE _label_10 # gcd_base_continued_fraction:208 JE _label_10 +Xx_1109: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:214 MOVAPD `tmp, `ab +Xx_1110: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:215 SHUFPD `tmp, `tmp, 0 +Xx_1111: MOVAPD XMM10, XMM0 # gcd_base_continued_fraction:217 MOVAPD `tmp2, `ab +Xx_1112: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:218 SHUFPD `tmp2, `tmp2, 3 +Xx_1113: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1114: MOVAPD XMM7, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1115: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1116: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1117: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1118: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1119: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1120: ADDPD XMM7, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1121: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1122: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1123: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1124: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1125: MOVAPD XMM8, XMM7 # gcd_base_continued_fraction:226 MOVAPD `new_ab_1, `new_ab +Xx_1126: SHUFPD XMM8, XMM8, 3 # gcd_base_continued_fraction:227 SHUFPD `new_ab_1, `new_ab_1, 3 +Xx_1127: MOVAPD XMM13, XMM7 # gcd_base_continued_fraction:231 MOVAPD `q, `new_ab +Xx_1128: DIVSD XMM13, XMM8 # gcd_base_continued_fraction:232 DIVSD `q, `new_ab_1 +Xx_1129: MOVAPD XMM9, XMM1 # gcd_base_continued_fraction:234 MOVAPD `tmp, `u +Xx_1130: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:235 SHUFPD `tmp, `tmp, 0 +Xx_1131: MOVAPD XMM10, XMM1 # gcd_base_continued_fraction:237 MOVAPD `tmp2, `u +Xx_1132: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:238 SHUFPD `tmp2, `tmp2, 3 +Xx_1133: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1134: MOVAPD XMM11, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1135: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1136: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1137: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1138: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1139: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1140: ADDPD XMM11, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1141: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1142: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1143: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1144: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1145: MOVAPD XMM9, XMM11 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1146: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1147: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1148: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1149: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1150: MOVAPD XMM9, XMM2 # gcd_base_continued_fraction:246 MOVAPD `tmp, `v +Xx_1151: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:247 SHUFPD `tmp, `tmp, 0 +Xx_1152: MOVAPD XMM10, XMM2 # gcd_base_continued_fraction:249 MOVAPD `tmp2, `v +Xx_1153: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:250 SHUFPD `tmp2, `tmp2, 3 +Xx_1154: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1155: MOVAPD XMM12, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1156: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1157: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1158: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1159: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1160: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1161: ADDPD XMM12, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1162: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1163: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1164: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1165: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1166: MOVAPD XMM9, XMM12 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1167: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1168: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1169: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1170: JNZ _label_10 # range_check:31 JNZ _label_10 +Xx_1171: UCOMISD XMM7, XMM4 # gcd_base_continued_fraction:260 UCOMISD `new_ab, `ab_threshold +Xx_1172: JBE _label_10 # gcd_base_continued_fraction:261 JBE _label_10 +Xx_1173: JMP _label_11 # gcd_base_continued_fraction:264 JMP _label_11 +Xx_1174: _label_10: # gcd_base_continued_fraction:266 _label_10: +Xx_1175: #gcd_base no table # gcd_base_continued_fraction:267 #gcd_base no table +Xx_1176: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:272 MOVAPD `tmp, `ab +Xx_1177: CMPLEPD XMM9, XMM4 # gcd_base_continued_fraction:273 CMPLEPD `tmp, `ab_threshold +Xx_1178: PTEST XMM9, XMM9 # gcd_base_continued_fraction:274 PTEST `tmp, `tmp +Xx_1179: JNZ _label_4 # gcd_base_continued_fraction:275 JNZ _label_4 +Xx_1180: MOVQ XMM13, R10 # gcd_base_continued_fraction:278 MOVQ `q, `q_scalar_3 +Xx_1181: ROUNDSD XMM13, XMM13, 1 # gcd_base_continued_fraction:282 ROUNDSD `q, `q, 1 +Xx_1182: MOVAPD XMM5, [RIP+_label_7] # gcd_base_continued_fraction:288 MOVAPD `m_0, [RIP+_label_7] +Xx_1183: MOVAPD XMM6, XMM5 # gcd_base_continued_fraction:289 MOVAPD `m_1, `m_0 +Xx_1184: SUBSD XMM6, XMM13 # gcd_base_continued_fraction:290 SUBSD `m_1, `q +Xx_1185: SHUFPD XMM6, XMM6, 1 # gcd_base_continued_fraction:291 SHUFPD `m_1, `m_1, 1 +Xx_1186: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:214 MOVAPD `tmp, `ab +Xx_1187: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:215 SHUFPD `tmp, `tmp, 0 +Xx_1188: MOVAPD XMM10, XMM0 # gcd_base_continued_fraction:217 MOVAPD `tmp2, `ab +Xx_1189: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:218 SHUFPD `tmp2, `tmp2, 3 +Xx_1190: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1191: MOVAPD XMM7, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1192: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1193: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1194: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1195: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1196: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1197: ADDPD XMM7, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1198: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1199: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1200: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1201: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1202: MOVAPD XMM8, XMM7 # gcd_base_continued_fraction:226 MOVAPD `new_ab_1, `new_ab +Xx_1203: SHUFPD XMM8, XMM8, 3 # gcd_base_continued_fraction:227 SHUFPD `new_ab_1, `new_ab_1, 3 +Xx_1204: MOVAPD XMM13, XMM7 # gcd_base_continued_fraction:231 MOVAPD `q, `new_ab +Xx_1205: DIVSD XMM13, XMM8 # gcd_base_continued_fraction:232 DIVSD `q, `new_ab_1 +Xx_1206: MOVAPD XMM9, XMM1 # gcd_base_continued_fraction:234 MOVAPD `tmp, `u +Xx_1207: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:235 SHUFPD `tmp, `tmp, 0 +Xx_1208: MOVAPD XMM10, XMM1 # gcd_base_continued_fraction:237 MOVAPD `tmp2, `u +Xx_1209: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:238 SHUFPD `tmp2, `tmp2, 3 +Xx_1210: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1211: MOVAPD XMM11, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1212: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1213: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1214: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1215: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1216: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1217: ADDPD XMM11, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1218: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1219: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1220: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1221: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1222: MOVAPD XMM9, XMM11 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1223: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1224: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1225: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1226: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1227: MOVAPD XMM9, XMM2 # gcd_base_continued_fraction:246 MOVAPD `tmp, `v +Xx_1228: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:247 SHUFPD `tmp, `tmp, 0 +Xx_1229: MOVAPD XMM10, XMM2 # gcd_base_continued_fraction:249 MOVAPD `tmp2, `v +Xx_1230: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:250 SHUFPD `tmp2, `tmp2, 3 +Xx_1231: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1232: MOVAPD XMM12, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1233: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1234: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1235: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1236: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1237: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1238: ADDPD XMM12, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1239: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1240: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1241: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1242: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1243: MOVAPD XMM9, XMM12 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1244: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1245: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1246: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1247: JNZ _label_4 # range_check:31 JNZ _label_4 +Xx_1248: _label_11: # gcd_base_continued_fraction:296 _label_11: +Xx_1249: #gcd_base end no table # gcd_base_continued_fraction:297 #gcd_base end no table +Xx_1250: MOVAPD XMM5, XMM11 # gcd_base_continued_fraction:322 MOVAPD `m_0, `new_u +Xx_1251: SHUFPD XMM5, XMM12, 0 # gcd_base_continued_fraction:323 SHUFPD `m_0, `new_v, 0 +Xx_1252: MOVAPD XMM6, XMM11 # gcd_base_continued_fraction:326 MOVAPD `m_1, `new_u +Xx_1253: SHUFPD XMM6, XMM12, 3 # gcd_base_continued_fraction:327 SHUFPD `m_1, `new_v, 3 +Xx_1254: MOVAPD XMM9, XMM7 # gcd_base_continued_fraction:330 MOVAPD `tmp, `new_ab +Xx_1255: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:331 SHUFPD `tmp, `tmp, 0 +Xx_1256: SUBPD XMM9, XMM8 # gcd_base_continued_fraction:334 SUBPD `tmp, `new_ab_1 +Xx_1257: ADDPD XMM9, XMM5 # gcd_base_continued_fraction:337 ADDPD `tmp, `m_0 +Xx_1258: CMPLTPD XMM9, XMM6 # gcd_base_continued_fraction:340 CMPLTPD `tmp, `m_1 +Xx_1259: .text # gcd_base_continued_fraction:109 .text +Xx_1260: .balign 16 # gcd_base_continued_fraction:113 .balign 16 +Xx_1261: _label_12: # gcd_base_continued_fraction:114 _label_12: +Xx_1262: .quad 0x8000000000000000 # gcd_base_continued_fraction:115 .quad 0x8000000000000000 +Xx_1263: .quad 0x8000000000000000 # gcd_base_continued_fraction:116 .quad 0x8000000000000000 +Xx_1264: .text # gcd_base_continued_fraction:117 .text +Xx_1265: XORPD XMM6, [RIP+_label_12] # gcd_base_continued_fraction:343 XORPD `m_1, [RIP+_label_12] +Xx_1266: CMPLTPD XMM8, XMM6 # gcd_base_continued_fraction:346 CMPLTPD `new_ab_1, `m_1 +Xx_1267: ORPD XMM9, XMM8 # gcd_base_continued_fraction:350 ORPD `tmp, `new_ab_1 +Xx_1268: ANDPD XMM9, XMM3 # gcd_base_continued_fraction:351 ANDPD `tmp, `is_lehmer +Xx_1269: PTEST XMM9, XMM9 # gcd_base_continued_fraction:352 PTEST `tmp, `tmp +Xx_1270: JNZ _label_4 # gcd_base_continued_fraction:353 JNZ _label_4 +Xx_1271: MOVAPD XMM0, XMM7 # gcd_base_continued_fraction:355 MOVAPD `ab, `new_ab +Xx_1272: MOVAPD XMM1, XMM11 # gcd_base_continued_fraction:356 MOVAPD `u, `new_u +Xx_1273: MOVAPD XMM2, XMM12 # gcd_base_continued_fraction:357 MOVAPD `v, `new_v +Xx_1274: DEC R11 # gcd_base_continued_fraction:360 DEC `loop_counter +Xx_1275: JNZ _label_5 # gcd_base_continued_fraction:361 JNZ _label_5 +Xx_1276: #gcd_base loop end # gcd_base_continued_fraction:363 #gcd_base loop end +Xx_1277: _label_4: # gcd_base_continued_fraction:368 _label_4: +Xx_1278: CMP R11, 0x5 # gcd_base_continued_fraction:370 CMP `loop_counter, 0x5 +Xx_1279: JE _label_0 # gcd_base_continued_fraction:371 JE _label_0 +Xx_1280: JMP _label_1 # compile_asm_gcd_base:49 JMP _label_1 +Xx_1281: _label_0: # compile_asm_gcd_base:50 _label_0: +Xx_1282: MOV QWORD PTR [R9], 1 # compile_asm_gcd_base:52 MOV QWORD PTR [`no_progress_addr], 1 +Xx_1283: JMP _label_2 # compile_asm_gcd_base:53 JMP _label_2 +Xx_1284: _label_1: # compile_asm_gcd_base:55 _label_1: +Xx_1285: MOV QWORD PTR [R9], 0 # compile_asm_gcd_base:57 MOV QWORD PTR [`no_progress_addr], 0 +Xx_1286: _label_2: # compile_asm_gcd_base:59 _label_2: +Xx_1287: MOVDQU [RDI], XMM0 # compile_asm_gcd_base:61 MOVDQU [`ab_addr], `ab +Xx_1288: MOVDQU [RSI], XMM1 # compile_asm_gcd_base:62 MOVDQU [`u_addr], `u +Xx_1289: MOVDQU [RDX], XMM2 # compile_asm_gcd_base:63 MOVDQU [`v_addr], `v +Xx_1290: MOVDQU [RCX], XMM3 # compile_asm_gcd_base:64 MOVDQU [`is_lehmer_addr], `is_lehmer +Xx_1291: MOVDQU [R8], XMM4 # compile_asm_gcd_base:65 MOVDQU [`ab_threshold_addr], `ab_threshold +Xx_1292: MOV RAX, 0 # ~asm_function:107 MOV RAX, 0 +Xx_1293: _label_13: # ~asm_function:110 _label_13: +Xx_1294: MOV RSP, [RSP] # ~asm_function:112 MOV RSP, [RSP] +Xx_1295: POP R15 # ~asm_function:114 POP R15 +Xx_1296: POP R14 # ~asm_function:114 POP R14 +Xx_1297: POP R13 # ~asm_function:114 POP R13 +Xx_1298: POP R12 # ~asm_function:114 POP R12 +Xx_1299: POP RBX # ~asm_function:114 POP RBX +Xx_1300: POP RBP # ~asm_function:114 POP RBP +Xx_1301: RET # ~asm_function:116 RET +Xx_1302: .global _asm_func_gcd_128 # asm_function:64 .global _asm_func_gcd_128 +Xx_1303: _asm_func_gcd_128: # asm_function:65 _asm_func_gcd_128: +Xx_1304: PUSH RBP # asm_function:84 PUSH RBP +Xx_1305: PUSH RBX # asm_function:84 PUSH RBX +Xx_1306: PUSH R12 # asm_function:84 PUSH R12 +Xx_1307: PUSH R13 # asm_function:84 PUSH R13 +Xx_1308: PUSH R14 # asm_function:84 PUSH R14 +Xx_1309: PUSH R15 # asm_function:84 PUSH R15 +Xx_1310: MOV RAX, RSP # asm_function:96 MOV RAX, RSP +Xx_1311: AND RSP, -64 # asm_function:97 AND RSP, -64 +Xx_1312: SUB RSP, 64 # asm_function:98 SUB RSP, 64 +Xx_1313: MOV [RSP], RAX # asm_function:99 MOV [RSP], RAX +Xx_1314: MOV [RSP+-0x400], RDI # compile_asm_gcd_128:112 MOV `spill_data_addr, `data_addr +Xx_1315: MOV RBX, [RDI+0x0] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x0] +Xx_1316: MOV [RSP+-0x3f8], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3f8], `tmp +Xx_1317: MOV RBX, [RDI+0x8] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x8] +Xx_1318: MOV [RSP+-0x3f0], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3f0], `tmp +Xx_1319: MOV RBX, [RDI+0x10] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x10] +Xx_1320: MOV [RSP+-0x3e8], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3e8], `tmp +Xx_1321: MOV RBX, [RDI+0x18] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x18] +Xx_1322: MOV [RSP+-0x3e0], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3e0], `tmp +Xx_1323: MOV RBX, [RDI+0x20] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x20] +Xx_1324: MOV [RSP+-0x3d8], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3d8], `tmp +Xx_1325: MOV RBX, [RDI+0x28] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x28] +Xx_1326: MOV [RSP+-0x3d0], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3d0], `tmp +Xx_1327: MOV RBX, [RDI+0x30] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x30] +Xx_1328: MOV [RSP+-0x3c8], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3c8], `tmp +Xx_1329: MOV RBX, [RDI+0x38] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x38] +Xx_1330: MOV [RSP+-0x3c0], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3c0], `tmp +Xx_1331: MOV RBX, [RDI+0x40] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x40] +Xx_1332: MOV [RSP+-0x3b8], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3b8], `tmp +Xx_1333: MOV RBX, [RDI+0x48] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x48] +Xx_1334: MOV [RSP+-0x3b0], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3b0], `tmp +Xx_1335: MOV RBX, [RDI+0x50] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x50] +Xx_1336: MOV [RSP+-0x3a8], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3a8], `tmp +Xx_1337: MOV RBX, [RDI+0x58] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x58] +Xx_1338: MOV [RSP+-0x3a0], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x3a0], `tmp +Xx_1339: MOV RBX, [RDI+0x60] # compile_asm_gcd_128:115 MOV `tmp, [`data_addr+0x60] +Xx_1340: MOV [RSP+-0x398], RBX # compile_asm_gcd_128:116 MOV [RSP+-0x398], `tmp +Xx_1341: MOV QWORD PTR [RSP+-0x3d8], 1 # gcd_128:107 MOV QWORD PTR `spill_u_0, 1 +Xx_1342: MOV QWORD PTR [RSP+-0x3d0], 0 # gcd_128:108 MOV QWORD PTR `spill_u_1, 0 +Xx_1343: MOV QWORD PTR [RSP+-0x3c8], 0 # gcd_128:109 MOV QWORD PTR `spill_v_0, 0 +Xx_1344: MOV QWORD PTR [RSP+-0x3c0], 1 # gcd_128:110 MOV QWORD PTR `spill_v_1, 1 +Xx_1345: MOV QWORD PTR [RSP+-0x3b8], 0 # gcd_128:111 MOV QWORD PTR `spill_parity, 0 +Xx_1346: MOV QWORD PTR [RSP+-0x390], 0x3 # gcd_128:112 MOV QWORD PTR `spill_iter, 0x3 +Xx_1347: JMP _label_21 # gcd_128:120 JMP _label_21 +Xx_1348: _label_18: # gcd_128:122 _label_18: +Xx_1349: MOV R12, [RIP+_label_3] # gcd_base_continued_fraction:141 MOV `c_table_delta_minus_1, [RIP+_label_3] +Xx_1350: MOV R11, 0x5 # gcd_base_continued_fraction:146 MOV `loop_counter, 0x5 +Xx_1351: MOVAPD XMM1, [RIP+_label_6] # gcd_base_continued_fraction:148 MOVAPD `u, [RIP+_label_6] +Xx_1352: MOVAPD XMM2, [RIP+_label_7] # gcd_base_continued_fraction:149 MOVAPD `v, [RIP+_label_7] +Xx_1353: MOVAPD XMM14, [RIP+_label_8] # gcd_base_continued_fraction:150 MOVAPD `range_check_range, [RIP+_label_8] +Xx_1354: MOVAPD XMM15, [RIP+_label_9] # gcd_base_continued_fraction:151 MOVAPD `double_abs_mask, [RIP+_label_9] +Xx_1355: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:154 MOVAPD `tmp, `ab +Xx_1356: SHUFPD XMM9, XMM9, 3 # gcd_base_continued_fraction:155 SHUFPD `tmp, `tmp, 3 +Xx_1357: MOVAPD XMM13, XMM0 # gcd_base_continued_fraction:156 MOVAPD `q, `ab +Xx_1358: DIVSD XMM13, XMM9 # gcd_base_continued_fraction:157 DIVSD `q, `tmp +Xx_1359: _label_23: # gcd_base_continued_fraction:160 _label_23: +Xx_1360: #gcd_base loop start # gcd_base_continued_fraction:166 #gcd_base loop start +Xx_1361: MOVQ R8, XMM13 # gcd_base_continued_fraction:169 MOVQ `q_scalar, `q +Xx_1362: MOV R9, R8 # gcd_base_continued_fraction:170 MOV `q_scalar_2, `q_scalar +Xx_1363: MOV R10, R8 # gcd_base_continued_fraction:171 MOV `q_scalar_3, `q_scalar +Xx_1364: SHR R8, 0x28 # gcd_base_continued_fraction:175 SHR `q_scalar, 0x28 +Xx_1365: AND R8, -32 # gcd_base_continued_fraction:176 AND `q_scalar, -32 +Xx_1366: SUB R8, 0x3ff000 # gcd_base_continued_fraction:181 SUB `q_scalar, 0x3ff000 +Xx_1367: JB _label_24 # gcd_base_continued_fraction:182 JB _label_24 +Xx_1368: CMP R8, 0x8000 # gcd_base_continued_fraction:183 CMP `q_scalar, 0x8000 +Xx_1369: JAE _label_24 # gcd_base_continued_fraction:184 JAE _label_24 +Xx_1370: LEA RSI,[RIP+gcd_base_table] # gcd_base_continued_fraction:189 LEA RSI,[RIP+gcd_base_table] +Xx_1371: MOVAPD XMM5, [R8+RSI] # gcd_base_continued_fraction:190 MOVAPD `m_0, [`q_scalar+RSI] +Xx_1372: MOVAPD XMM6, [16+R8+RSI] # gcd_base_continued_fraction:191 MOVAPD `m_1, [16+`q_scalar+RSI] +Xx_1373: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:199 MOVAPD `tmp, `ab +Xx_1374: CMPLEPD XMM9, XMM4 # gcd_base_continued_fraction:200 CMPLEPD `tmp, `ab_threshold +Xx_1375: PTEST XMM9, XMM9 # gcd_base_continued_fraction:201 PTEST `tmp, `tmp +Xx_1376: JNZ _label_22 # gcd_base_continued_fraction:202 JNZ _label_22 +Xx_1377: AND R9, R12 # gcd_base_continued_fraction:205 AND `q_scalar_2, `c_table_delta_minus_1 +Xx_1378: JZ _label_24 # gcd_base_continued_fraction:206 JZ _label_24 +Xx_1379: CMP R9, R12 # gcd_base_continued_fraction:207 CMP `q_scalar_2, `c_table_delta_minus_1 +Xx_1380: JE _label_24 # gcd_base_continued_fraction:208 JE _label_24 +Xx_1381: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:214 MOVAPD `tmp, `ab +Xx_1382: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:215 SHUFPD `tmp, `tmp, 0 +Xx_1383: MOVAPD XMM10, XMM0 # gcd_base_continued_fraction:217 MOVAPD `tmp2, `ab +Xx_1384: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:218 SHUFPD `tmp2, `tmp2, 3 +Xx_1385: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1386: MOVAPD XMM7, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1387: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1388: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1389: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1390: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1391: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1392: ADDPD XMM7, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1393: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1394: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1395: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1396: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1397: MOVAPD XMM8, XMM7 # gcd_base_continued_fraction:226 MOVAPD `new_ab_1, `new_ab +Xx_1398: SHUFPD XMM8, XMM8, 3 # gcd_base_continued_fraction:227 SHUFPD `new_ab_1, `new_ab_1, 3 +Xx_1399: MOVAPD XMM13, XMM7 # gcd_base_continued_fraction:231 MOVAPD `q, `new_ab +Xx_1400: DIVSD XMM13, XMM8 # gcd_base_continued_fraction:232 DIVSD `q, `new_ab_1 +Xx_1401: MOVAPD XMM9, XMM1 # gcd_base_continued_fraction:234 MOVAPD `tmp, `u +Xx_1402: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:235 SHUFPD `tmp, `tmp, 0 +Xx_1403: MOVAPD XMM10, XMM1 # gcd_base_continued_fraction:237 MOVAPD `tmp2, `u +Xx_1404: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:238 SHUFPD `tmp2, `tmp2, 3 +Xx_1405: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1406: MOVAPD XMM11, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1407: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1408: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1409: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1410: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1411: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1412: ADDPD XMM11, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1413: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1414: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1415: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1416: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1417: MOVAPD XMM9, XMM11 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1418: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1419: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1420: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1421: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1422: MOVAPD XMM9, XMM2 # gcd_base_continued_fraction:246 MOVAPD `tmp, `v +Xx_1423: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:247 SHUFPD `tmp, `tmp, 0 +Xx_1424: MOVAPD XMM10, XMM2 # gcd_base_continued_fraction:249 MOVAPD `tmp2, `v +Xx_1425: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:250 SHUFPD `tmp2, `tmp2, 3 +Xx_1426: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1427: MOVAPD XMM12, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1428: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1429: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1430: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1431: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1432: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1433: ADDPD XMM12, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1434: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1435: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1436: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1437: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1438: MOVAPD XMM9, XMM12 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1439: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1440: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1441: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1442: JNZ _label_24 # range_check:31 JNZ _label_24 +Xx_1443: UCOMISD XMM7, XMM4 # gcd_base_continued_fraction:260 UCOMISD `new_ab, `ab_threshold +Xx_1444: JBE _label_24 # gcd_base_continued_fraction:261 JBE _label_24 +Xx_1445: JMP _label_25 # gcd_base_continued_fraction:264 JMP _label_25 +Xx_1446: _label_24: # gcd_base_continued_fraction:266 _label_24: +Xx_1447: #gcd_base no table # gcd_base_continued_fraction:267 #gcd_base no table +Xx_1448: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:272 MOVAPD `tmp, `ab +Xx_1449: CMPLEPD XMM9, XMM4 # gcd_base_continued_fraction:273 CMPLEPD `tmp, `ab_threshold +Xx_1450: PTEST XMM9, XMM9 # gcd_base_continued_fraction:274 PTEST `tmp, `tmp +Xx_1451: JNZ _label_22 # gcd_base_continued_fraction:275 JNZ _label_22 +Xx_1452: MOVQ XMM13, R10 # gcd_base_continued_fraction:278 MOVQ `q, `q_scalar_3 +Xx_1453: ROUNDSD XMM13, XMM13, 1 # gcd_base_continued_fraction:282 ROUNDSD `q, `q, 1 +Xx_1454: MOVAPD XMM5, [RIP+_label_7] # gcd_base_continued_fraction:288 MOVAPD `m_0, [RIP+_label_7] +Xx_1455: MOVAPD XMM6, XMM5 # gcd_base_continued_fraction:289 MOVAPD `m_1, `m_0 +Xx_1456: SUBSD XMM6, XMM13 # gcd_base_continued_fraction:290 SUBSD `m_1, `q +Xx_1457: SHUFPD XMM6, XMM6, 1 # gcd_base_continued_fraction:291 SHUFPD `m_1, `m_1, 1 +Xx_1458: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:214 MOVAPD `tmp, `ab +Xx_1459: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:215 SHUFPD `tmp, `tmp, 0 +Xx_1460: MOVAPD XMM10, XMM0 # gcd_base_continued_fraction:217 MOVAPD `tmp2, `ab +Xx_1461: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:218 SHUFPD `tmp2, `tmp2, 3 +Xx_1462: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1463: MOVAPD XMM7, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1464: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1465: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1466: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1467: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1468: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1469: ADDPD XMM7, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1470: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1471: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1472: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1473: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1474: MOVAPD XMM8, XMM7 # gcd_base_continued_fraction:226 MOVAPD `new_ab_1, `new_ab +Xx_1475: SHUFPD XMM8, XMM8, 3 # gcd_base_continued_fraction:227 SHUFPD `new_ab_1, `new_ab_1, 3 +Xx_1476: MOVAPD XMM13, XMM7 # gcd_base_continued_fraction:231 MOVAPD `q, `new_ab +Xx_1477: DIVSD XMM13, XMM8 # gcd_base_continued_fraction:232 DIVSD `q, `new_ab_1 +Xx_1478: MOVAPD XMM9, XMM1 # gcd_base_continued_fraction:234 MOVAPD `tmp, `u +Xx_1479: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:235 SHUFPD `tmp, `tmp, 0 +Xx_1480: MOVAPD XMM10, XMM1 # gcd_base_continued_fraction:237 MOVAPD `tmp2, `u +Xx_1481: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:238 SHUFPD `tmp2, `tmp2, 3 +Xx_1482: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1483: MOVAPD XMM11, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1484: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1485: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1486: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1487: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1488: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1489: ADDPD XMM11, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1490: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1491: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1492: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1493: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1494: MOVAPD XMM9, XMM11 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1495: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1496: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1497: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1498: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1499: MOVAPD XMM9, XMM2 # gcd_base_continued_fraction:246 MOVAPD `tmp, `v +Xx_1500: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:247 SHUFPD `tmp, `tmp, 0 +Xx_1501: MOVAPD XMM10, XMM2 # gcd_base_continued_fraction:249 MOVAPD `tmp2, `v +Xx_1502: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:250 SHUFPD `tmp2, `tmp2, 3 +Xx_1503: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1504: MOVAPD XMM12, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1505: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1506: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1507: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1508: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1509: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1510: ADDPD XMM12, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1511: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1512: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1513: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1514: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1515: MOVAPD XMM9, XMM12 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1516: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1517: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1518: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1519: JNZ _label_22 # range_check:31 JNZ _label_22 +Xx_1520: _label_25: # gcd_base_continued_fraction:296 _label_25: +Xx_1521: #gcd_base end no table # gcd_base_continued_fraction:297 #gcd_base end no table +Xx_1522: MOVAPD XMM5, XMM11 # gcd_base_continued_fraction:322 MOVAPD `m_0, `new_u +Xx_1523: SHUFPD XMM5, XMM12, 0 # gcd_base_continued_fraction:323 SHUFPD `m_0, `new_v, 0 +Xx_1524: MOVAPD XMM6, XMM11 # gcd_base_continued_fraction:326 MOVAPD `m_1, `new_u +Xx_1525: SHUFPD XMM6, XMM12, 3 # gcd_base_continued_fraction:327 SHUFPD `m_1, `new_v, 3 +Xx_1526: MOVAPD XMM9, XMM7 # gcd_base_continued_fraction:330 MOVAPD `tmp, `new_ab +Xx_1527: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:331 SHUFPD `tmp, `tmp, 0 +Xx_1528: SUBPD XMM9, XMM8 # gcd_base_continued_fraction:334 SUBPD `tmp, `new_ab_1 +Xx_1529: ADDPD XMM9, XMM5 # gcd_base_continued_fraction:337 ADDPD `tmp, `m_0 +Xx_1530: CMPLTPD XMM9, XMM6 # gcd_base_continued_fraction:340 CMPLTPD `tmp, `m_1 +Xx_1531: XORPD XMM6, [RIP+_label_12] # gcd_base_continued_fraction:343 XORPD `m_1, [RIP+_label_12] +Xx_1532: CMPLTPD XMM8, XMM6 # gcd_base_continued_fraction:346 CMPLTPD `new_ab_1, `m_1 +Xx_1533: ORPD XMM9, XMM8 # gcd_base_continued_fraction:350 ORPD `tmp, `new_ab_1 +Xx_1534: ANDPD XMM9, XMM3 # gcd_base_continued_fraction:351 ANDPD `tmp, `is_lehmer +Xx_1535: PTEST XMM9, XMM9 # gcd_base_continued_fraction:352 PTEST `tmp, `tmp +Xx_1536: JNZ _label_22 # gcd_base_continued_fraction:353 JNZ _label_22 +Xx_1537: MOVAPD XMM0, XMM7 # gcd_base_continued_fraction:355 MOVAPD `ab, `new_ab +Xx_1538: MOVAPD XMM1, XMM11 # gcd_base_continued_fraction:356 MOVAPD `u, `new_u +Xx_1539: MOVAPD XMM2, XMM12 # gcd_base_continued_fraction:357 MOVAPD `v, `new_v +Xx_1540: DEC R11 # gcd_base_continued_fraction:360 DEC `loop_counter +Xx_1541: JNZ _label_23 # gcd_base_continued_fraction:361 JNZ _label_23 +Xx_1542: #gcd_base loop end # gcd_base_continued_fraction:363 #gcd_base loop end +Xx_1543: _label_22: # gcd_base_continued_fraction:368 _label_22: +Xx_1544: CMP R11, 0x5 # gcd_base_continued_fraction:370 CMP `loop_counter, 0x5 +Xx_1545: JE _label_19 # gcd_base_continued_fraction:371 JE _label_19 +Xx_1546: MOVAPD XMM8, [RIP+_label_9] # gcd_128:175 MOVAPD `double_abs_mask, [RIP+_label_9] +Xx_1547: MOVAPD XMM5, XMM1 # gcd_128:203 MOVAPD `tmp_a, `vector_u +Xx_1548: PAND XMM5, XMM8 # gcd_128:189 PAND `tmp_a, `double_abs_mask +Xx_1549: CVTTSD2SI R8, XMM5 # gcd_128:197 CVTTSD2SI `m_0_0, `tmp_a +Xx_1550: SHUFPD XMM5, XMM5, 3 # gcd_128:206 SHUFPD `tmp_a, `tmp_a, 3 +Xx_1551: CVTTSD2SI R10, XMM5 # gcd_128:197 CVTTSD2SI `m_1_0, `tmp_a +Xx_1552: MOVAPD XMM5, XMM2 # gcd_128:211 MOVAPD `tmp_a, `vector_v +Xx_1553: PAND XMM5, XMM8 # gcd_128:189 PAND `tmp_a, `double_abs_mask +Xx_1554: CVTTSD2SI R9, XMM5 # gcd_128:197 CVTTSD2SI `m_0_1, `tmp_a +Xx_1555: SHUFPD XMM5, XMM5, 3 # gcd_128:214 SHUFPD `tmp_a, `tmp_a, 3 +Xx_1556: CVTTSD2SI R11, XMM5 # gcd_128:197 CVTTSD2SI `m_1_1, `tmp_a +Xx_1557: MOV R12, [RSP+-0x3d8] # gcd_128:217 MOV `tmp_0, `spill_u_0 +Xx_1558: MOV R13, [RSP+-0x3d0] # gcd_128:218 MOV `tmp_1, `spill_u_1 +Xx_1559: MOV RAX, R8 # dot_product_exact:56 MOV RAX, `a_0 +Xx_1560: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_1561: JC _label_19 # dot_product_exact:58 JC _label_19 +Xx_1562: MOV RBX, RAX # dot_product_exact:59 MOV `out, RAX +Xx_1563: MOV RAX, R9 # dot_product_exact:62 MOV RAX, `a_1 +Xx_1564: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_1565: JC _label_19 # dot_product_exact:64 JC _label_19 +Xx_1566: ADD RBX, RAX # dot_product_exact:67 ADD `out, RAX +Xx_1567: JC _label_19 # dot_product_exact:68 JC _label_19 +Xx_1568: MOV RAX, R10 # dot_product_exact:56 MOV RAX, `a_0 +Xx_1569: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_1570: JC _label_19 # dot_product_exact:58 JC _label_19 +Xx_1571: MOV RBP, RAX # dot_product_exact:59 MOV `out, RAX +Xx_1572: MOV RAX, R11 # dot_product_exact:62 MOV RAX, `a_1 +Xx_1573: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_1574: JC _label_19 # dot_product_exact:64 JC _label_19 +Xx_1575: ADD RBP, RAX # dot_product_exact:67 ADD `out, RAX +Xx_1576: JC _label_19 # dot_product_exact:68 JC _label_19 +Xx_1577: MOV R12, [RSP+-0x3c8] # gcd_128:222 MOV `tmp_0, `spill_v_0 +Xx_1578: MOV R13, [RSP+-0x3c0] # gcd_128:223 MOV `tmp_1, `spill_v_1 +Xx_1579: MOV RAX, R8 # dot_product_exact:56 MOV RAX, `a_0 +Xx_1580: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_1581: JC _label_19 # dot_product_exact:58 JC _label_19 +Xx_1582: MOV RSI, RAX # dot_product_exact:59 MOV `out, RAX +Xx_1583: MOV RAX, R9 # dot_product_exact:62 MOV RAX, `a_1 +Xx_1584: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_1585: JC _label_19 # dot_product_exact:64 JC _label_19 +Xx_1586: ADD RSI, RAX # dot_product_exact:67 ADD `out, RAX +Xx_1587: JC _label_19 # dot_product_exact:68 JC _label_19 +Xx_1588: MOV RAX, R10 # dot_product_exact:56 MOV RAX, `a_0 +Xx_1589: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_1590: JC _label_19 # dot_product_exact:58 JC _label_19 +Xx_1591: MOV RDI, RAX # dot_product_exact:59 MOV `out, RAX +Xx_1592: MOV RAX, R11 # dot_product_exact:62 MOV RAX, `a_1 +Xx_1593: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_1594: JC _label_19 # dot_product_exact:64 JC _label_19 +Xx_1595: ADD RDI, RAX # dot_product_exact:67 ADD `out, RAX +Xx_1596: JC _label_19 # dot_product_exact:68 JC _label_19 +Xx_1597: MOV R13, [RSP+-0x3f8] # gcd_128:249 MOV `ab_start_0_0, `spill_ab_start_0_0 +Xx_1598: MOV R14, [RSP+-0x3f0] # gcd_128:250 MOV `ab_start_0_1, `spill_ab_start_0_1 +Xx_1599: MOV R15, [RSP+-0x3e8] # gcd_128:251 MOV `ab_start_1_0, `spill_ab_start_1_0 +Xx_1600: MOV RCX, [RSP+-0x3e0] # gcd_128:252 MOV `ab_start_1_1, `spill_ab_start_1_1 +Xx_1601: MOVAPD XMM5, XMM2 # gcd_128:256 MOVAPD `tmp_a, `vector_v +Xx_1602: SHUFPD XMM5, XMM5, 3 # gcd_128:257 SHUFPD `tmp_a, `tmp_a, 3 +Xx_1603: MOVQ RAX, XMM5 # gcd_128:258 MOVQ RAX, `tmp_a +Xx_1604: SHR RAX, 63 # gcd_128:259 SHR RAX, 63 +Xx_1605: MOV R12, [RSP+-0x3b8] # gcd_128:262 MOV `new_parity, `spill_parity +Xx_1606: XOR R12, RAX # gcd_128:263 XOR `new_parity, RAX +Xx_1607: MOV RAX, R13 # gcd_128:268 MOV RAX, `ab_start_0_0 +Xx_1608: MUL RBX # gcd_128:269 MUL `new_u_0 +Xx_1609: MOV R8, RAX # gcd_128:272 MOV `new_ab_0_0, RAX +Xx_1610: MOV R9, RDX # gcd_128:273 MOV `new_ab_0_1, RDX +Xx_1611: MOV RAX, R14 # gcd_128:276 MOV RAX, `ab_start_0_1 +Xx_1612: MUL RBX # gcd_128:277 MUL `new_u_0 +Xx_1613: ADD R9, RAX # gcd_128:280 ADD `new_ab_0_1, RAX +Xx_1614: MOV RAX, R15 # gcd_128:283 MOV RAX, `ab_start_1_0 +Xx_1615: MUL RSI # gcd_128:284 MUL `new_v_0 +Xx_1616: SUB R8, RAX # gcd_128:287 SUB `new_ab_0_0, RAX +Xx_1617: SBB R9, RDX # gcd_128:288 SBB `new_ab_0_1, RDX +Xx_1618: MOV RAX, RCX # gcd_128:291 MOV RAX, `ab_start_1_1 +Xx_1619: MUL RSI # gcd_128:292 MUL `new_v_0 +Xx_1620: SUB R9, RAX # gcd_128:295 SUB `new_ab_0_1, RAX +Xx_1621: MOV RAX, R15 # gcd_128:268 MOV RAX, `ab_start_1_0 +Xx_1622: MUL RDI # gcd_128:269 MUL `new_v_1 +Xx_1623: MOV R10, RAX # gcd_128:272 MOV `new_ab_1_0, RAX +Xx_1624: MOV R11, RDX # gcd_128:273 MOV `new_ab_1_1, RDX +Xx_1625: MOV RAX, RCX # gcd_128:276 MOV RAX, `ab_start_1_1 +Xx_1626: MUL RDI # gcd_128:277 MUL `new_v_1 +Xx_1627: ADD R11, RAX # gcd_128:280 ADD `new_ab_1_1, RAX +Xx_1628: MOV RAX, R13 # gcd_128:283 MOV RAX, `ab_start_0_0 +Xx_1629: MUL RBP # gcd_128:284 MUL `new_u_1 +Xx_1630: SUB R10, RAX # gcd_128:287 SUB `new_ab_1_0, RAX +Xx_1631: SBB R11, RDX # gcd_128:288 SBB `new_ab_1_1, RDX +Xx_1632: MOV RAX, R14 # gcd_128:291 MOV RAX, `ab_start_0_1 +Xx_1633: MUL RBP # gcd_128:292 MUL `new_u_1 +Xx_1634: SUB R11, RAX # gcd_128:295 SUB `new_ab_1_1, RAX +Xx_1635: MOV RAX, -1 # gcd_128:326 MOV RAX, -1 +Xx_1636: ADD RAX, R12 # gcd_128:327 ADD RAX, `new_parity +Xx_1637: NOT RAX # gcd_128:328 NOT RAX +Xx_1638: XOR R8, RAX # gcd_128:333 XOR `new_ab_0_0, RAX +Xx_1639: XOR R9, RAX # gcd_128:334 XOR `new_ab_0_1, RAX +Xx_1640: ADD R8, R12 # gcd_128:337 ADD `new_ab_0_0, `new_parity +Xx_1641: ADC R9, 0 # gcd_128:338 ADC `new_ab_0_1, 0 +Xx_1642: XOR R10, RAX # gcd_128:333 XOR `new_ab_1_0, RAX +Xx_1643: XOR R11, RAX # gcd_128:334 XOR `new_ab_1_1, RAX +Xx_1644: ADD R10, R12 # gcd_128:337 ADD `new_ab_1_0, `new_parity +Xx_1645: ADC R11, 0 # gcd_128:338 ADC `new_ab_1_1, 0 +Xx_1646: MOV RAX, [RSP+-0x3b0] # gcd_128:366 MOV `is_lehmer, `spill_is_lehmer +Xx_1647: MOV R15, R8 # gcd_128:369 MOV `ab_delta_0, `new_ab_0_0 +Xx_1648: MOV RCX, R9 # gcd_128:370 MOV `ab_delta_1, `new_ab_0_1 +Xx_1649: SUB R15, R10 # gcd_128:371 SUB `ab_delta_0, `new_ab_1_0 +Xx_1650: SBB RCX, R11 # gcd_128:372 SBB `ab_delta_1, `new_ab_1_1 +Xx_1651: CMP R12, 0 # gcd_128:391 CMP `new_parity, 0 +Xx_1652: MOV R13, RBP # gcd_128:392 MOV `ab_threshold_0, `new_u_1 +Xx_1653: CMOVE R13, RDI # gcd_128:393 CMOVE `ab_threshold_0, `new_v_1 +Xx_1654: MOV R14, RBX # gcd_128:397 MOV `ab_threshold_1, `new_u_0 +Xx_1655: CMOVE R14, RSI # gcd_128:398 CMOVE `ab_threshold_1, `new_v_0 +Xx_1656: MOV RDX, RDI # gcd_128:401 MOV `b_new_min, `new_v_1 +Xx_1657: CMOVE RDX, RBP # gcd_128:402 CMOVE `b_new_min, `new_u_1 +Xx_1658: CMP RAX, 0 # gcd_128:405 CMP `is_lehmer, 0 +Xx_1659: CMOVE R13, RAX # gcd_128:406 CMOVE `ab_threshold_0, `is_lehmer +Xx_1660: CMOVE R14, RAX # gcd_128:407 CMOVE `ab_threshold_1, `is_lehmer +Xx_1661: CMOVE RDX, RAX # gcd_128:410 CMOVE `b_new_min, `is_lehmer +Xx_1662: ADD R13, R14 # gcd_128:413 ADD `ab_threshold_0, `ab_threshold_1 +Xx_1663: MOV R14, 0 # gcd_128:414 MOV `ab_threshold_1, 0 +Xx_1664: ADC R14, 0 # gcd_128:415 ADC `ab_threshold_1, 0 +Xx_1665: SUB R15, R13 # gcd_128:420 SUB `ab_delta_0, `ab_threshold_0 +Xx_1666: SBB RCX, R14 # gcd_128:421 SBB `ab_delta_1, `ab_threshold_1 +Xx_1667: JC _label_19 # gcd_128:422 JC _label_19 +Xx_1668: CMP R10, RDX # gcd_128:426 CMP `new_ab_1_0, `b_new_min +Xx_1669: MOV RDX, R11 # gcd_128:427 MOV `b_new_min, `new_ab_1_1 +Xx_1670: SBB RDX, 0 # gcd_128:428 SBB `b_new_min, 0 +Xx_1671: JC _label_19 # gcd_128:429 JC _label_19 +Xx_1672: MOV R13, [RSP+-0x3a8] # gcd_128:434 MOV `ab_threshold_0, `spill_ab_threshold_0 +Xx_1673: MOV R14, [RSP+-0x3a0] # gcd_128:435 MOV `ab_threshold_1, `spill_ab_threshold_1 +Xx_1674: MOV R15, R13 # gcd_128:438 MOV `ab_delta_0, `ab_threshold_0 +Xx_1675: MOV RCX, R14 # gcd_128:439 MOV `ab_delta_1, `ab_threshold_1 +Xx_1676: SUB R15, R8 # gcd_128:440 SUB `ab_delta_0, `new_ab_0_0 +Xx_1677: SBB RCX, R9 # gcd_128:441 SBB `ab_delta_1, `new_ab_0_1 +Xx_1678: JNC _label_19 # gcd_128:442 JNC _label_19 +Xx_1679: MOV [RSP+-0x3d8], RBX # gcd_128:445 MOV `spill_u_0, `new_u_0 +Xx_1680: MOV [RSP+-0x3d0], RBP # gcd_128:446 MOV `spill_u_1, `new_u_1 +Xx_1681: MOV [RSP+-0x3c8], RSI # gcd_128:449 MOV `spill_v_0, `new_v_0 +Xx_1682: MOV [RSP+-0x3c0], RDI # gcd_128:450 MOV `spill_v_1, `new_v_1 +Xx_1683: MOV [RSP+-0x3b8], R12 # gcd_128:453 MOV `spill_parity, `new_parity +Xx_1684: MOV R15, [RSP+-0x390] # gcd_128:459 MOV `ab_delta_0, `spill_iter +Xx_1685: DEC R15 # gcd_128:460 DEC `ab_delta_0 +Xx_1686: MOV [RSP+-0x390], R15 # gcd_128:461 MOV `spill_iter, `ab_delta_0 +Xx_1687: JZ _label_20 # gcd_128:462 JZ _label_20 +Xx_1688: _label_17: # gcd_128:465 _label_17: +Xx_1689: XOR RCX, RCX # gcd_128:492 XOR `tmp_3, `tmp_3 +Xx_1690: MOV RAX, R10 # gcd_128:495 MOV `tmp_0, `ab_1_0 +Xx_1691: MOV RDX, R11 # gcd_128:496 MOV `tmp_1, `ab_1_1 +Xx_1692: SUB RAX, R13 # gcd_128:497 SUB `tmp_0, `ab_threshold_0 +Xx_1693: SBB RDX, R14 # gcd_128:498 SBB `tmp_1, `ab_threshold_1 +Xx_1694: JC _label_19 # gcd_128:501 JC _label_19 +Xx_1695: MOV R15, RAX # gcd_128:504 MOV `tmp_2, `tmp_0 +Xx_1696: OR R15, RDX # gcd_128:505 OR `tmp_2, `tmp_1 +Xx_1697: JZ _label_19 # gcd_128:506 JZ _label_19 +Xx_1698: MOV RAX, R9 # gcd_128:511 MOV `tmp_0, `ab_0_1 +Xx_1699: MOV RDX, 64 # gcd_128:512 MOV `tmp_1, 64 +Xx_1700: CMP R9, 0 # gcd_128:513 CMP `ab_0_1, 0 +Xx_1701: JNE _label_26 # gcd_128:516 JNE _label_26 +Xx_1702: MOV RAX, R8 # gcd_128:517 MOV `tmp_0, `ab_0_0 +Xx_1703: _label_26: # gcd_128:518 _label_26: +Xx_1704: JNE _label_27 # gcd_128:521 JNE _label_27 +Xx_1705: MOV RDX, RCX # gcd_128:522 MOV `tmp_1, `tmp_3 +Xx_1706: _label_27: # gcd_128:523 _label_27: +Xx_1707: BSR RAX, RAX # gcd_128:530 BSR `tmp_0, `tmp_0 +Xx_1708: ADD RDX, RAX # gcd_128:533 ADD `tmp_1, `tmp_0 +Xx_1709: INC RDX # gcd_128:534 INC `tmp_1 +Xx_1710: XOR RAX, RAX # gcd_128:545 XOR `tmp_0, `tmp_0 +Xx_1711: MOV R15, [RSP+-0x3b0] # gcd_128:546 MOV `tmp_2, `spill_is_lehmer +Xx_1712: CMP R15, 0 # gcd_128:547 CMP `tmp_2, 0 +Xx_1713: MOV RCX, 96 # gcd_128:548 MOV `tmp_3, 96 +Xx_1714: CMOVNE RAX, RCX # gcd_128:549 CMOVNE `tmp_0, `tmp_3 +Xx_1715: XOR RCX, RCX # gcd_128:550 XOR `tmp_3, `tmp_3 +Xx_1716: CMP RDX, RAX # gcd_128:553 CMP `tmp_1, `tmp_0 +Xx_1717: CMOVB RDX, RAX # gcd_128:554 CMOVB `tmp_1, `tmp_0 +Xx_1718: SUB RDX, 0x32 # gcd_128:562 SUB `tmp_1, 0x32 +Xx_1719: CMOVB RDX, RCX # gcd_128:566 CMOVB `tmp_1, `tmp_3 +Xx_1720: OR R15, RDX # gcd_128:569 OR `tmp_2, `tmp_1 +Xx_1721: .text # gcd_128:109 .text +Xx_1722: .balign 16 # gcd_128:113 .balign 16 +Xx_1723: _label_28: # gcd_128:114 _label_28: +Xx_1724: .quad 0x0 # gcd_128:115 .quad 0x0 +Xx_1725: .quad 0x0 # gcd_128:116 .quad 0x0 +Xx_1726: .text # gcd_128:117 .text +Xx_1727: LEA RCX, [RIP+_label_28] # gcd_128:572 LEA `tmp_3, [RIP+_label_28] +Xx_1728: .text # gcd_128:109 .text +Xx_1729: .balign 16 # gcd_128:113 .balign 16 +Xx_1730: _label_29: # gcd_128:114 _label_29: +Xx_1731: .quad 0xffffffffffffffff # gcd_128:115 .quad 0xffffffffffffffff +Xx_1732: .quad 0xffffffffffffffff # gcd_128:116 .quad 0xffffffffffffffff +Xx_1733: .text # gcd_128:117 .text +Xx_1734: LEA RAX, [RIP+_label_29] # gcd_128:573 LEA `tmp_0, [RIP+_label_29] +Xx_1735: CMOVZ RAX, RCX # gcd_128:587 CMOVZ `tmp_0, `tmp_3 +Xx_1736: MOVAPD XMM3, [RAX] # gcd_128:588 MOVAPD `vector_is_lehmer, [`tmp_0] +Xx_1737: MOV RCX, RDX # shift_right:23 MOV RCX, `amount +Xx_1738: MOV RBP, R10 # shift_right:24 MOV `res, `v_0 +Xx_1739: SHRD RBP, R11, CL # shift_right:25 SHRD `res, `v_1, CL +Xx_1740: XOR R15, R15 # shift_right:28 XOR `res_2, `res_2 +Xx_1741: SUB RCX, 64 # shift_right:31 SUB RCX, 64 +Xx_1742: CMOVAE RBP, R15 # shift_right:35 CMOVAE `res, `res_2 +Xx_1743: CMOVAE R15, R11 # shift_right:36 CMOVAE `res_2, `v_1 +Xx_1744: SHR R15, CL # shift_right:39 SHR `res_2, CL +Xx_1745: OR RBP, R15 # shift_right:42 OR `res, `res_2 +Xx_1746: CVTSI2SD XMM0, RBP # gcd_128:604 CVTSI2SD `vector_ab, `new_u_1 +Xx_1747: SHUFPD XMM0, XMM0, 0 # gcd_128:609 SHUFPD `vector_ab, `vector_ab, 0 +Xx_1748: MOV RCX, RDX # shift_right:23 MOV RCX, `amount +Xx_1749: MOV RBX, R8 # shift_right:24 MOV `res, `v_0 +Xx_1750: SHRD RBX, R9, CL # shift_right:25 SHRD `res, `v_1, CL +Xx_1751: XOR R15, R15 # shift_right:28 XOR `res_2, `res_2 +Xx_1752: SUB RCX, 64 # shift_right:31 SUB RCX, 64 +Xx_1753: CMOVAE RBX, R15 # shift_right:35 CMOVAE `res, `res_2 +Xx_1754: CMOVAE R15, R9 # shift_right:36 CMOVAE `res_2, `v_1 +Xx_1755: SHR R15, CL # shift_right:39 SHR `res_2, CL +Xx_1756: OR RBX, R15 # shift_right:42 OR `res, `res_2 +Xx_1757: CVTSI2SD XMM0, RBX # gcd_128:616 CVTSI2SD `vector_ab, `new_u_0 +Xx_1758: MOV RCX, RDX # shift_right:23 MOV RCX, `amount +Xx_1759: MOV RSI, R13 # shift_right:24 MOV `res, `v_0 +Xx_1760: SHRD RSI, R14, CL # shift_right:25 SHRD `res, `v_1, CL +Xx_1761: XOR R15, R15 # shift_right:28 XOR `res_2, `res_2 +Xx_1762: SUB RCX, 64 # shift_right:31 SUB RCX, 64 +Xx_1763: CMOVAE RSI, R15 # shift_right:35 CMOVAE `res, `res_2 +Xx_1764: CMOVAE R15, R14 # shift_right:36 CMOVAE `res_2, `v_1 +Xx_1765: SHR R15, CL # shift_right:39 SHR `res_2, CL +Xx_1766: OR RSI, R15 # shift_right:42 OR `res, `res_2 +Xx_1767: CVTSI2SD XMM4, RSI # gcd_128:625 CVTSI2SD `vector_ab_threshold, `new_v_0 +Xx_1768: SHUFPD XMM4, XMM4, 0 # gcd_128:626 SHUFPD `vector_ab_threshold, `vector_ab_threshold, 0 +Xx_1769: JMP _label_18 # gcd_128:630 JMP _label_18 +Xx_1770: _label_19: # gcd_128:635 _label_19: +Xx_1771: MOV R15, [RSP+-0x390] # gcd_128:643 MOV `tmp, `spill_iter +Xx_1772: CMP R15, 0x3 # gcd_128:644 CMP `tmp, 0x3 +Xx_1773: JE _label_14 # gcd_128:645 JE _label_14 +Xx_1774: JMP _label_20 # gcd_128:647 JMP _label_20 +Xx_1775: _label_21: # gcd_128:652 _label_21: +Xx_1776: MOV R8, [RSP+-0x3f8] # gcd_128:654 MOV `new_ab_0_0, `spill_ab_start_0_0 +Xx_1777: MOV R9, [RSP+-0x3f0] # gcd_128:655 MOV `new_ab_0_1, `spill_ab_start_0_1 +Xx_1778: MOV R10, [RSP+-0x3e8] # gcd_128:656 MOV `new_ab_1_0, `spill_ab_start_1_0 +Xx_1779: MOV R11, [RSP+-0x3e0] # gcd_128:657 MOV `new_ab_1_1, `spill_ab_start_1_1 +Xx_1780: MOV R13, [RSP+-0x3a8] # gcd_128:658 MOV `ab_threshold_0, `spill_ab_threshold_0 +Xx_1781: MOV R14, [RSP+-0x3a0] # gcd_128:659 MOV `ab_threshold_1, `spill_ab_threshold_1 +Xx_1782: JMP _label_17 # gcd_128:661 JMP _label_17 +Xx_1783: _label_20: # gcd_128:666 _label_20: +Xx_1784: JMP _label_15 # compile_asm_gcd_128:140 JMP _label_15 +Xx_1785: _label_14: # compile_asm_gcd_128:141 _label_14: +Xx_1786: MOV RBX, 1 # compile_asm_gcd_128:143 MOV `tmp, 1 +Xx_1787: JMP _label_16 # compile_asm_gcd_128:144 JMP _label_16 +Xx_1788: _label_15: # compile_asm_gcd_128:146 _label_15: +Xx_1789: MOV RBX, 0 # compile_asm_gcd_128:148 MOV `tmp, 0 +Xx_1790: _label_16: # compile_asm_gcd_128:150 _label_16: +Xx_1791: MOV [RSP+-0x398], RBX # compile_asm_gcd_128:152 MOV [RSP+-0x398], `tmp +Xx_1792: MOV RBP, [RSP+-0x400] # compile_asm_gcd_128:154 MOV `data_addr, `spill_data_addr +Xx_1793: MOV RBX, [RSP+-0x3f8] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3f8] +Xx_1794: MOV [RBP+0x0], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x0], `tmp +Xx_1795: MOV RBX, [RSP+-0x3f0] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3f0] +Xx_1796: MOV [RBP+0x8], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x8], `tmp +Xx_1797: MOV RBX, [RSP+-0x3e8] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3e8] +Xx_1798: MOV [RBP+0x10], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x10], `tmp +Xx_1799: MOV RBX, [RSP+-0x3e0] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3e0] +Xx_1800: MOV [RBP+0x18], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x18], `tmp +Xx_1801: MOV RBX, [RSP+-0x3d8] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3d8] +Xx_1802: MOV [RBP+0x20], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x20], `tmp +Xx_1803: MOV RBX, [RSP+-0x3d0] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3d0] +Xx_1804: MOV [RBP+0x28], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x28], `tmp +Xx_1805: MOV RBX, [RSP+-0x3c8] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3c8] +Xx_1806: MOV [RBP+0x30], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x30], `tmp +Xx_1807: MOV RBX, [RSP+-0x3c0] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3c0] +Xx_1808: MOV [RBP+0x38], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x38], `tmp +Xx_1809: MOV RBX, [RSP+-0x3b8] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3b8] +Xx_1810: MOV [RBP+0x40], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x40], `tmp +Xx_1811: MOV RBX, [RSP+-0x3b0] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3b0] +Xx_1812: MOV [RBP+0x48], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x48], `tmp +Xx_1813: MOV RBX, [RSP+-0x3a8] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3a8] +Xx_1814: MOV [RBP+0x50], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x50], `tmp +Xx_1815: MOV RBX, [RSP+-0x3a0] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x3a0] +Xx_1816: MOV [RBP+0x58], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x58], `tmp +Xx_1817: MOV RBX, [RSP+-0x398] # compile_asm_gcd_128:157 MOV `tmp, [RSP+-0x398] +Xx_1818: MOV [RBP+0x60], RBX # compile_asm_gcd_128:158 MOV [`data_addr+0x60], `tmp +Xx_1819: MOV RAX, 0 # ~asm_function:107 MOV RAX, 0 +Xx_1820: _label_30: # ~asm_function:110 _label_30: +Xx_1821: MOV RSP, [RSP] # ~asm_function:112 MOV RSP, [RSP] +Xx_1822: POP R15 # ~asm_function:114 POP R15 +Xx_1823: POP R14 # ~asm_function:114 POP R14 +Xx_1824: POP R13 # ~asm_function:114 POP R13 +Xx_1825: POP R12 # ~asm_function:114 POP R12 +Xx_1826: POP RBX # ~asm_function:114 POP RBX +Xx_1827: POP RBP # ~asm_function:114 POP RBP +Xx_1828: RET # ~asm_function:116 RET +Xx_1829: .global _asm_func_gcd_unsigned # asm_function:64 .global _asm_func_gcd_unsigned +Xx_1830: _asm_func_gcd_unsigned: # asm_function:65 _asm_func_gcd_unsigned: +Xx_1831: PUSH RBP # asm_function:84 PUSH RBP +Xx_1832: PUSH RBX # asm_function:84 PUSH RBX +Xx_1833: PUSH R12 # asm_function:84 PUSH R12 +Xx_1834: PUSH R13 # asm_function:84 PUSH R13 +Xx_1835: PUSH R14 # asm_function:84 PUSH R14 +Xx_1836: PUSH R15 # asm_function:84 PUSH R15 +Xx_1837: MOV RAX, RSP # asm_function:96 MOV RAX, RSP +Xx_1838: AND RSP, -64 # asm_function:97 AND RSP, -64 +Xx_1839: SUB RSP, 64 # asm_function:98 SUB RSP, 64 +Xx_1840: MOV [RSP], RAX # asm_function:99 MOV [RSP], RAX +Xx_1841: MOV [RSP+-0x400], RDI # compile_asm_gcd_unsigned:202 MOV `spill_data_addr, `data_addr +Xx_1842: MOV RBX, [RDI+0x0] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x0] +Xx_1843: MOV [RSP+-0x3f8], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3f8], `tmp +Xx_1844: MOV RBX, [RDI+0x8] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x8] +Xx_1845: MOV [RSP+-0x3f0], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3f0], `tmp +Xx_1846: MOV RBX, [RDI+0x10] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x10] +Xx_1847: MOV [RSP+-0x3e8], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3e8], `tmp +Xx_1848: MOV RBX, [RDI+0x18] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x18] +Xx_1849: MOV [RSP+-0x3e0], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3e0], `tmp +Xx_1850: MOV RBX, [RDI+0x20] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x20] +Xx_1851: MOV [RSP+-0x3d8], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3d8], `tmp +Xx_1852: MOV RBX, [RDI+0x28] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x28] +Xx_1853: MOV [RSP+-0x3d0], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3d0], `tmp +Xx_1854: MOV RBX, [RDI+0x30] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x30] +Xx_1855: MOV [RSP+-0x3c8], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3c8], `tmp +Xx_1856: MOV RBX, [RDI+0x38] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x38] +Xx_1857: MOV [RSP+-0x3c0], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3c0], `tmp +Xx_1858: MOV RBX, [RDI+0x40] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x40] +Xx_1859: MOV [RSP+-0x3b8], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3b8], `tmp +Xx_1860: MOV RBX, [RDI+0x48] # compile_asm_gcd_unsigned:205 MOV `tmp, [`data_addr+0x48] +Xx_1861: MOV [RSP+-0x3b0], RBX # compile_asm_gcd_unsigned:206 MOV [RSP+-0x3b0], `tmp +Xx_1862: MOV QWORD PTR [RSP+-0x3b8], -1 # gcd_unsigned:415 MOV QWORD PTR `spill_iter, -1 +Xx_1863: JMP _label_31 # gcd_unsigned:421 JMP _label_31 +Xx_1864: _label_32: # gcd_unsigned:423 _label_32: +Xx_1865: MOV QWORD PTR [RSP+-0x3a8], 1 # gcd_128:107 MOV QWORD PTR `spill_u_0, 1 +Xx_1866: MOV QWORD PTR [RSP+-0x3a0], 0 # gcd_128:108 MOV QWORD PTR `spill_u_1, 0 +Xx_1867: MOV QWORD PTR [RSP+-0x398], 0 # gcd_128:109 MOV QWORD PTR `spill_v_0, 0 +Xx_1868: MOV QWORD PTR [RSP+-0x390], 1 # gcd_128:110 MOV QWORD PTR `spill_v_1, 1 +Xx_1869: MOV QWORD PTR [RSP+-0x388], 0 # gcd_128:111 MOV QWORD PTR `spill_parity, 0 +Xx_1870: MOV QWORD PTR [RSP+-0x348], 0x3 # gcd_128:112 MOV QWORD PTR `spill_iter, 0x3 +Xx_1871: JMP _label_38 # gcd_128:120 JMP _label_38 +Xx_1872: _label_35: # gcd_128:122 _label_35: +Xx_1873: MOV R12, [RIP+_label_3] # gcd_base_continued_fraction:141 MOV `c_table_delta_minus_1, [RIP+_label_3] +Xx_1874: MOV R11, 0x5 # gcd_base_continued_fraction:146 MOV `loop_counter, 0x5 +Xx_1875: MOVAPD XMM1, [RIP+_label_6] # gcd_base_continued_fraction:148 MOVAPD `u, [RIP+_label_6] +Xx_1876: MOVAPD XMM2, [RIP+_label_7] # gcd_base_continued_fraction:149 MOVAPD `v, [RIP+_label_7] +Xx_1877: MOVAPD XMM14, [RIP+_label_8] # gcd_base_continued_fraction:150 MOVAPD `range_check_range, [RIP+_label_8] +Xx_1878: MOVAPD XMM15, [RIP+_label_9] # gcd_base_continued_fraction:151 MOVAPD `double_abs_mask, [RIP+_label_9] +Xx_1879: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:154 MOVAPD `tmp, `ab +Xx_1880: SHUFPD XMM9, XMM9, 3 # gcd_base_continued_fraction:155 SHUFPD `tmp, `tmp, 3 +Xx_1881: MOVAPD XMM13, XMM0 # gcd_base_continued_fraction:156 MOVAPD `q, `ab +Xx_1882: DIVSD XMM13, XMM9 # gcd_base_continued_fraction:157 DIVSD `q, `tmp +Xx_1883: _label_40: # gcd_base_continued_fraction:160 _label_40: +Xx_1884: #gcd_base loop start # gcd_base_continued_fraction:166 #gcd_base loop start +Xx_1885: MOVQ R8, XMM13 # gcd_base_continued_fraction:169 MOVQ `q_scalar, `q +Xx_1886: MOV R9, R8 # gcd_base_continued_fraction:170 MOV `q_scalar_2, `q_scalar +Xx_1887: MOV R10, R8 # gcd_base_continued_fraction:171 MOV `q_scalar_3, `q_scalar +Xx_1888: SHR R8, 0x28 # gcd_base_continued_fraction:175 SHR `q_scalar, 0x28 +Xx_1889: AND R8, -32 # gcd_base_continued_fraction:176 AND `q_scalar, -32 +Xx_1890: SUB R8, 0x3ff000 # gcd_base_continued_fraction:181 SUB `q_scalar, 0x3ff000 +Xx_1891: JB _label_41 # gcd_base_continued_fraction:182 JB _label_41 +Xx_1892: CMP R8, 0x8000 # gcd_base_continued_fraction:183 CMP `q_scalar, 0x8000 +Xx_1893: JAE _label_41 # gcd_base_continued_fraction:184 JAE _label_41 +Xx_1894: LEA RSI,[RIP+gcd_base_table] # gcd_base_continued_fraction:189 LEA RSI,[RIP+gcd_base_table] +Xx_1895: MOVAPD XMM5, [R8+RSI] # gcd_base_continued_fraction:190 MOVAPD `m_0, [`q_scalar+RSI] +Xx_1896: MOVAPD XMM6, [16+R8+RSI] # gcd_base_continued_fraction:191 MOVAPD `m_1, [16+`q_scalar+RSI] +Xx_1897: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:199 MOVAPD `tmp, `ab +Xx_1898: CMPLEPD XMM9, XMM4 # gcd_base_continued_fraction:200 CMPLEPD `tmp, `ab_threshold +Xx_1899: PTEST XMM9, XMM9 # gcd_base_continued_fraction:201 PTEST `tmp, `tmp +Xx_1900: JNZ _label_39 # gcd_base_continued_fraction:202 JNZ _label_39 +Xx_1901: AND R9, R12 # gcd_base_continued_fraction:205 AND `q_scalar_2, `c_table_delta_minus_1 +Xx_1902: JZ _label_41 # gcd_base_continued_fraction:206 JZ _label_41 +Xx_1903: CMP R9, R12 # gcd_base_continued_fraction:207 CMP `q_scalar_2, `c_table_delta_minus_1 +Xx_1904: JE _label_41 # gcd_base_continued_fraction:208 JE _label_41 +Xx_1905: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:214 MOVAPD `tmp, `ab +Xx_1906: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:215 SHUFPD `tmp, `tmp, 0 +Xx_1907: MOVAPD XMM10, XMM0 # gcd_base_continued_fraction:217 MOVAPD `tmp2, `ab +Xx_1908: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:218 SHUFPD `tmp2, `tmp2, 3 +Xx_1909: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1910: MOVAPD XMM7, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1911: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1912: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1913: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1914: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1915: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1916: ADDPD XMM7, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1917: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1918: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1919: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1920: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1921: MOVAPD XMM8, XMM7 # gcd_base_continued_fraction:226 MOVAPD `new_ab_1, `new_ab +Xx_1922: SHUFPD XMM8, XMM8, 3 # gcd_base_continued_fraction:227 SHUFPD `new_ab_1, `new_ab_1, 3 +Xx_1923: MOVAPD XMM13, XMM7 # gcd_base_continued_fraction:231 MOVAPD `q, `new_ab +Xx_1924: DIVSD XMM13, XMM8 # gcd_base_continued_fraction:232 DIVSD `q, `new_ab_1 +Xx_1925: MOVAPD XMM9, XMM1 # gcd_base_continued_fraction:234 MOVAPD `tmp, `u +Xx_1926: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:235 SHUFPD `tmp, `tmp, 0 +Xx_1927: MOVAPD XMM10, XMM1 # gcd_base_continued_fraction:237 MOVAPD `tmp2, `u +Xx_1928: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:238 SHUFPD `tmp2, `tmp2, 3 +Xx_1929: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1930: MOVAPD XMM11, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1931: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1932: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1933: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1934: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1935: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1936: ADDPD XMM11, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1937: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1938: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1939: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1940: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1941: MOVAPD XMM9, XMM11 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1942: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1943: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1944: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1945: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1946: MOVAPD XMM9, XMM2 # gcd_base_continued_fraction:246 MOVAPD `tmp, `v +Xx_1947: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:247 SHUFPD `tmp, `tmp, 0 +Xx_1948: MOVAPD XMM10, XMM2 # gcd_base_continued_fraction:249 MOVAPD `tmp2, `v +Xx_1949: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:250 SHUFPD `tmp2, `tmp2, 3 +Xx_1950: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1951: MOVAPD XMM12, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1952: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1953: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1954: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1955: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1956: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1957: ADDPD XMM12, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1958: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1959: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1960: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1961: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1962: MOVAPD XMM9, XMM12 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_1963: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1964: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1965: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1966: JNZ _label_41 # range_check:31 JNZ _label_41 +Xx_1967: UCOMISD XMM7, XMM4 # gcd_base_continued_fraction:260 UCOMISD `new_ab, `ab_threshold +Xx_1968: JBE _label_41 # gcd_base_continued_fraction:261 JBE _label_41 +Xx_1969: JMP _label_42 # gcd_base_continued_fraction:264 JMP _label_42 +Xx_1970: _label_41: # gcd_base_continued_fraction:266 _label_41: +Xx_1971: #gcd_base no table # gcd_base_continued_fraction:267 #gcd_base no table +Xx_1972: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:272 MOVAPD `tmp, `ab +Xx_1973: CMPLEPD XMM9, XMM4 # gcd_base_continued_fraction:273 CMPLEPD `tmp, `ab_threshold +Xx_1974: PTEST XMM9, XMM9 # gcd_base_continued_fraction:274 PTEST `tmp, `tmp +Xx_1975: JNZ _label_39 # gcd_base_continued_fraction:275 JNZ _label_39 +Xx_1976: MOVQ XMM13, R10 # gcd_base_continued_fraction:278 MOVQ `q, `q_scalar_3 +Xx_1977: ROUNDSD XMM13, XMM13, 1 # gcd_base_continued_fraction:282 ROUNDSD `q, `q, 1 +Xx_1978: MOVAPD XMM5, [RIP+_label_7] # gcd_base_continued_fraction:288 MOVAPD `m_0, [RIP+_label_7] +Xx_1979: MOVAPD XMM6, XMM5 # gcd_base_continued_fraction:289 MOVAPD `m_1, `m_0 +Xx_1980: SUBSD XMM6, XMM13 # gcd_base_continued_fraction:290 SUBSD `m_1, `q +Xx_1981: SHUFPD XMM6, XMM6, 1 # gcd_base_continued_fraction:291 SHUFPD `m_1, `m_1, 1 +Xx_1982: MOVAPD XMM9, XMM0 # gcd_base_continued_fraction:214 MOVAPD `tmp, `ab +Xx_1983: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:215 SHUFPD `tmp, `tmp, 0 +Xx_1984: MOVAPD XMM10, XMM0 # gcd_base_continued_fraction:217 MOVAPD `tmp2, `ab +Xx_1985: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:218 SHUFPD `tmp2, `tmp2, 3 +Xx_1986: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_1987: MOVAPD XMM7, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_1988: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1989: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1990: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_1991: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_1992: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_1993: ADDPD XMM7, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_1994: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_1995: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_1996: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_1997: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_1998: MOVAPD XMM8, XMM7 # gcd_base_continued_fraction:226 MOVAPD `new_ab_1, `new_ab +Xx_1999: SHUFPD XMM8, XMM8, 3 # gcd_base_continued_fraction:227 SHUFPD `new_ab_1, `new_ab_1, 3 +Xx_2000: MOVAPD XMM13, XMM7 # gcd_base_continued_fraction:231 MOVAPD `q, `new_ab +Xx_2001: DIVSD XMM13, XMM8 # gcd_base_continued_fraction:232 DIVSD `q, `new_ab_1 +Xx_2002: MOVAPD XMM9, XMM1 # gcd_base_continued_fraction:234 MOVAPD `tmp, `u +Xx_2003: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:235 SHUFPD `tmp, `tmp, 0 +Xx_2004: MOVAPD XMM10, XMM1 # gcd_base_continued_fraction:237 MOVAPD `tmp2, `u +Xx_2005: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:238 SHUFPD `tmp2, `tmp2, 3 +Xx_2006: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_2007: MOVAPD XMM11, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_2008: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_2009: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_2010: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_2011: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_2012: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_2013: ADDPD XMM11, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_2014: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_2015: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_2016: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_2017: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_2018: MOVAPD XMM9, XMM11 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_2019: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_2020: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_2021: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_2022: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_2023: MOVAPD XMM9, XMM2 # gcd_base_continued_fraction:246 MOVAPD `tmp, `v +Xx_2024: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:247 SHUFPD `tmp, `tmp, 0 +Xx_2025: MOVAPD XMM10, XMM2 # gcd_base_continued_fraction:249 MOVAPD `tmp2, `v +Xx_2026: SHUFPD XMM10, XMM10, 3 # gcd_base_continued_fraction:250 SHUFPD `tmp2, `tmp2, 3 +Xx_2027: MULPD XMM9, XMM5 # dot_product_exact:46 MULPD `b_0, `a_0 +Xx_2028: MOVAPD XMM12, XMM9 # dot_product_exact:47 MOVAPD `v, `b_0 +Xx_2029: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_2030: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_2031: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_2032: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_2033: MULPD XMM10, XMM6 # dot_product_exact:54 MULPD `b_1, `a_1 +Xx_2034: ADDPD XMM12, XMM10 # dot_product_exact:55 ADDPD `v, `b_1 +Xx_2035: ANDPD XMM10, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_2036: CMPNLEPD XMM10, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_2037: PTEST XMM10, XMM10 # range_check:30 PTEST `tmp, `tmp +Xx_2038: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_2039: MOVAPD XMM9, XMM12 # dot_product_exact:60 MOVAPD `b_0, `v +Xx_2040: ANDPD XMM9, XMM15 # range_check:21 ANDPD `tmp, `double_abs_mask +Xx_2041: CMPNLEPD XMM9, XMM14 # range_check:24 CMPNLEPD `tmp, `range +Xx_2042: PTEST XMM9, XMM9 # range_check:30 PTEST `tmp, `tmp +Xx_2043: JNZ _label_39 # range_check:31 JNZ _label_39 +Xx_2044: _label_42: # gcd_base_continued_fraction:296 _label_42: +Xx_2045: #gcd_base end no table # gcd_base_continued_fraction:297 #gcd_base end no table +Xx_2046: MOVAPD XMM5, XMM11 # gcd_base_continued_fraction:322 MOVAPD `m_0, `new_u +Xx_2047: SHUFPD XMM5, XMM12, 0 # gcd_base_continued_fraction:323 SHUFPD `m_0, `new_v, 0 +Xx_2048: MOVAPD XMM6, XMM11 # gcd_base_continued_fraction:326 MOVAPD `m_1, `new_u +Xx_2049: SHUFPD XMM6, XMM12, 3 # gcd_base_continued_fraction:327 SHUFPD `m_1, `new_v, 3 +Xx_2050: MOVAPD XMM9, XMM7 # gcd_base_continued_fraction:330 MOVAPD `tmp, `new_ab +Xx_2051: SHUFPD XMM9, XMM9, 0 # gcd_base_continued_fraction:331 SHUFPD `tmp, `tmp, 0 +Xx_2052: SUBPD XMM9, XMM8 # gcd_base_continued_fraction:334 SUBPD `tmp, `new_ab_1 +Xx_2053: ADDPD XMM9, XMM5 # gcd_base_continued_fraction:337 ADDPD `tmp, `m_0 +Xx_2054: CMPLTPD XMM9, XMM6 # gcd_base_continued_fraction:340 CMPLTPD `tmp, `m_1 +Xx_2055: XORPD XMM6, [RIP+_label_12] # gcd_base_continued_fraction:343 XORPD `m_1, [RIP+_label_12] +Xx_2056: CMPLTPD XMM8, XMM6 # gcd_base_continued_fraction:346 CMPLTPD `new_ab_1, `m_1 +Xx_2057: ORPD XMM9, XMM8 # gcd_base_continued_fraction:350 ORPD `tmp, `new_ab_1 +Xx_2058: ANDPD XMM9, XMM3 # gcd_base_continued_fraction:351 ANDPD `tmp, `is_lehmer +Xx_2059: PTEST XMM9, XMM9 # gcd_base_continued_fraction:352 PTEST `tmp, `tmp +Xx_2060: JNZ _label_39 # gcd_base_continued_fraction:353 JNZ _label_39 +Xx_2061: MOVAPD XMM0, XMM7 # gcd_base_continued_fraction:355 MOVAPD `ab, `new_ab +Xx_2062: MOVAPD XMM1, XMM11 # gcd_base_continued_fraction:356 MOVAPD `u, `new_u +Xx_2063: MOVAPD XMM2, XMM12 # gcd_base_continued_fraction:357 MOVAPD `v, `new_v +Xx_2064: DEC R11 # gcd_base_continued_fraction:360 DEC `loop_counter +Xx_2065: JNZ _label_40 # gcd_base_continued_fraction:361 JNZ _label_40 +Xx_2066: #gcd_base loop end # gcd_base_continued_fraction:363 #gcd_base loop end +Xx_2067: _label_39: # gcd_base_continued_fraction:368 _label_39: +Xx_2068: CMP R11, 0x5 # gcd_base_continued_fraction:370 CMP `loop_counter, 0x5 +Xx_2069: JE _label_36 # gcd_base_continued_fraction:371 JE _label_36 +Xx_2070: MOVAPD XMM8, [RIP+_label_9] # gcd_128:175 MOVAPD `double_abs_mask, [RIP+_label_9] +Xx_2071: MOVAPD XMM5, XMM1 # gcd_128:203 MOVAPD `tmp_a, `vector_u +Xx_2072: PAND XMM5, XMM8 # gcd_128:189 PAND `tmp_a, `double_abs_mask +Xx_2073: CVTTSD2SI R8, XMM5 # gcd_128:197 CVTTSD2SI `m_0_0, `tmp_a +Xx_2074: SHUFPD XMM5, XMM5, 3 # gcd_128:206 SHUFPD `tmp_a, `tmp_a, 3 +Xx_2075: CVTTSD2SI R10, XMM5 # gcd_128:197 CVTTSD2SI `m_1_0, `tmp_a +Xx_2076: MOVAPD XMM5, XMM2 # gcd_128:211 MOVAPD `tmp_a, `vector_v +Xx_2077: PAND XMM5, XMM8 # gcd_128:189 PAND `tmp_a, `double_abs_mask +Xx_2078: CVTTSD2SI R9, XMM5 # gcd_128:197 CVTTSD2SI `m_0_1, `tmp_a +Xx_2079: SHUFPD XMM5, XMM5, 3 # gcd_128:214 SHUFPD `tmp_a, `tmp_a, 3 +Xx_2080: CVTTSD2SI R11, XMM5 # gcd_128:197 CVTTSD2SI `m_1_1, `tmp_a +Xx_2081: MOV R12, [RSP+-0x3a8] # gcd_128:217 MOV `tmp_0, `spill_u_0 +Xx_2082: MOV R13, [RSP+-0x3a0] # gcd_128:218 MOV `tmp_1, `spill_u_1 +Xx_2083: MOV RAX, R8 # dot_product_exact:56 MOV RAX, `a_0 +Xx_2084: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_2085: JC _label_36 # dot_product_exact:58 JC _label_36 +Xx_2086: MOV RBX, RAX # dot_product_exact:59 MOV `out, RAX +Xx_2087: MOV RAX, R9 # dot_product_exact:62 MOV RAX, `a_1 +Xx_2088: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_2089: JC _label_36 # dot_product_exact:64 JC _label_36 +Xx_2090: ADD RBX, RAX # dot_product_exact:67 ADD `out, RAX +Xx_2091: JC _label_36 # dot_product_exact:68 JC _label_36 +Xx_2092: MOV RAX, R10 # dot_product_exact:56 MOV RAX, `a_0 +Xx_2093: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_2094: JC _label_36 # dot_product_exact:58 JC _label_36 +Xx_2095: MOV RBP, RAX # dot_product_exact:59 MOV `out, RAX +Xx_2096: MOV RAX, R11 # dot_product_exact:62 MOV RAX, `a_1 +Xx_2097: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_2098: JC _label_36 # dot_product_exact:64 JC _label_36 +Xx_2099: ADD RBP, RAX # dot_product_exact:67 ADD `out, RAX +Xx_2100: JC _label_36 # dot_product_exact:68 JC _label_36 +Xx_2101: MOV R12, [RSP+-0x398] # gcd_128:222 MOV `tmp_0, `spill_v_0 +Xx_2102: MOV R13, [RSP+-0x390] # gcd_128:223 MOV `tmp_1, `spill_v_1 +Xx_2103: MOV RAX, R8 # dot_product_exact:56 MOV RAX, `a_0 +Xx_2104: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_2105: JC _label_36 # dot_product_exact:58 JC _label_36 +Xx_2106: MOV RSI, RAX # dot_product_exact:59 MOV `out, RAX +Xx_2107: MOV RAX, R9 # dot_product_exact:62 MOV RAX, `a_1 +Xx_2108: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_2109: JC _label_36 # dot_product_exact:64 JC _label_36 +Xx_2110: ADD RSI, RAX # dot_product_exact:67 ADD `out, RAX +Xx_2111: JC _label_36 # dot_product_exact:68 JC _label_36 +Xx_2112: MOV RAX, R10 # dot_product_exact:56 MOV RAX, `a_0 +Xx_2113: MUL R12 # dot_product_exact:57 MUL `b_0 +Xx_2114: JC _label_36 # dot_product_exact:58 JC _label_36 +Xx_2115: MOV RDI, RAX # dot_product_exact:59 MOV `out, RAX +Xx_2116: MOV RAX, R11 # dot_product_exact:62 MOV RAX, `a_1 +Xx_2117: MUL R13 # dot_product_exact:63 MUL `b_1 +Xx_2118: JC _label_36 # dot_product_exact:64 JC _label_36 +Xx_2119: ADD RDI, RAX # dot_product_exact:67 ADD `out, RAX +Xx_2120: JC _label_36 # dot_product_exact:68 JC _label_36 +Xx_2121: MOV R13, [RSP+-0x378] # gcd_128:249 MOV `ab_start_0_0, `spill_ab_start_0_0 +Xx_2122: MOV R14, [RSP+-0x370] # gcd_128:250 MOV `ab_start_0_1, `spill_ab_start_0_1 +Xx_2123: MOV R15, [RSP+-0x368] # gcd_128:251 MOV `ab_start_1_0, `spill_ab_start_1_0 +Xx_2124: MOV RCX, [RSP+-0x360] # gcd_128:252 MOV `ab_start_1_1, `spill_ab_start_1_1 +Xx_2125: MOVAPD XMM5, XMM2 # gcd_128:256 MOVAPD `tmp_a, `vector_v +Xx_2126: SHUFPD XMM5, XMM5, 3 # gcd_128:257 SHUFPD `tmp_a, `tmp_a, 3 +Xx_2127: MOVQ RAX, XMM5 # gcd_128:258 MOVQ RAX, `tmp_a +Xx_2128: SHR RAX, 63 # gcd_128:259 SHR RAX, 63 +Xx_2129: MOV R12, [RSP+-0x388] # gcd_128:262 MOV `new_parity, `spill_parity +Xx_2130: XOR R12, RAX # gcd_128:263 XOR `new_parity, RAX +Xx_2131: MOV RAX, R13 # gcd_128:268 MOV RAX, `ab_start_0_0 +Xx_2132: MUL RBX # gcd_128:269 MUL `new_u_0 +Xx_2133: MOV R8, RAX # gcd_128:272 MOV `new_ab_0_0, RAX +Xx_2134: MOV R9, RDX # gcd_128:273 MOV `new_ab_0_1, RDX +Xx_2135: MOV RAX, R14 # gcd_128:276 MOV RAX, `ab_start_0_1 +Xx_2136: MUL RBX # gcd_128:277 MUL `new_u_0 +Xx_2137: ADD R9, RAX # gcd_128:280 ADD `new_ab_0_1, RAX +Xx_2138: MOV RAX, R15 # gcd_128:283 MOV RAX, `ab_start_1_0 +Xx_2139: MUL RSI # gcd_128:284 MUL `new_v_0 +Xx_2140: SUB R8, RAX # gcd_128:287 SUB `new_ab_0_0, RAX +Xx_2141: SBB R9, RDX # gcd_128:288 SBB `new_ab_0_1, RDX +Xx_2142: MOV RAX, RCX # gcd_128:291 MOV RAX, `ab_start_1_1 +Xx_2143: MUL RSI # gcd_128:292 MUL `new_v_0 +Xx_2144: SUB R9, RAX # gcd_128:295 SUB `new_ab_0_1, RAX +Xx_2145: MOV RAX, R15 # gcd_128:268 MOV RAX, `ab_start_1_0 +Xx_2146: MUL RDI # gcd_128:269 MUL `new_v_1 +Xx_2147: MOV R10, RAX # gcd_128:272 MOV `new_ab_1_0, RAX +Xx_2148: MOV R11, RDX # gcd_128:273 MOV `new_ab_1_1, RDX +Xx_2149: MOV RAX, RCX # gcd_128:276 MOV RAX, `ab_start_1_1 +Xx_2150: MUL RDI # gcd_128:277 MUL `new_v_1 +Xx_2151: ADD R11, RAX # gcd_128:280 ADD `new_ab_1_1, RAX +Xx_2152: MOV RAX, R13 # gcd_128:283 MOV RAX, `ab_start_0_0 +Xx_2153: MUL RBP # gcd_128:284 MUL `new_u_1 +Xx_2154: SUB R10, RAX # gcd_128:287 SUB `new_ab_1_0, RAX +Xx_2155: SBB R11, RDX # gcd_128:288 SBB `new_ab_1_1, RDX +Xx_2156: MOV RAX, R14 # gcd_128:291 MOV RAX, `ab_start_0_1 +Xx_2157: MUL RBP # gcd_128:292 MUL `new_u_1 +Xx_2158: SUB R11, RAX # gcd_128:295 SUB `new_ab_1_1, RAX +Xx_2159: MOV RAX, -1 # gcd_128:326 MOV RAX, -1 +Xx_2160: ADD RAX, R12 # gcd_128:327 ADD RAX, `new_parity +Xx_2161: NOT RAX # gcd_128:328 NOT RAX +Xx_2162: XOR R8, RAX # gcd_128:333 XOR `new_ab_0_0, RAX +Xx_2163: XOR R9, RAX # gcd_128:334 XOR `new_ab_0_1, RAX +Xx_2164: ADD R8, R12 # gcd_128:337 ADD `new_ab_0_0, `new_parity +Xx_2165: ADC R9, 0 # gcd_128:338 ADC `new_ab_0_1, 0 +Xx_2166: XOR R10, RAX # gcd_128:333 XOR `new_ab_1_0, RAX +Xx_2167: XOR R11, RAX # gcd_128:334 XOR `new_ab_1_1, RAX +Xx_2168: ADD R10, R12 # gcd_128:337 ADD `new_ab_1_0, `new_parity +Xx_2169: ADC R11, 0 # gcd_128:338 ADC `new_ab_1_1, 0 +Xx_2170: MOV RAX, [RSP+-0x380] # gcd_128:366 MOV `is_lehmer, `spill_is_lehmer +Xx_2171: MOV R15, R8 # gcd_128:369 MOV `ab_delta_0, `new_ab_0_0 +Xx_2172: MOV RCX, R9 # gcd_128:370 MOV `ab_delta_1, `new_ab_0_1 +Xx_2173: SUB R15, R10 # gcd_128:371 SUB `ab_delta_0, `new_ab_1_0 +Xx_2174: SBB RCX, R11 # gcd_128:372 SBB `ab_delta_1, `new_ab_1_1 +Xx_2175: CMP R12, 0 # gcd_128:391 CMP `new_parity, 0 +Xx_2176: MOV R13, RBP # gcd_128:392 MOV `ab_threshold_0, `new_u_1 +Xx_2177: CMOVE R13, RDI # gcd_128:393 CMOVE `ab_threshold_0, `new_v_1 +Xx_2178: MOV R14, RBX # gcd_128:397 MOV `ab_threshold_1, `new_u_0 +Xx_2179: CMOVE R14, RSI # gcd_128:398 CMOVE `ab_threshold_1, `new_v_0 +Xx_2180: MOV RDX, RDI # gcd_128:401 MOV `b_new_min, `new_v_1 +Xx_2181: CMOVE RDX, RBP # gcd_128:402 CMOVE `b_new_min, `new_u_1 +Xx_2182: CMP RAX, 0 # gcd_128:405 CMP `is_lehmer, 0 +Xx_2183: CMOVE R13, RAX # gcd_128:406 CMOVE `ab_threshold_0, `is_lehmer +Xx_2184: CMOVE R14, RAX # gcd_128:407 CMOVE `ab_threshold_1, `is_lehmer +Xx_2185: CMOVE RDX, RAX # gcd_128:410 CMOVE `b_new_min, `is_lehmer +Xx_2186: ADD R13, R14 # gcd_128:413 ADD `ab_threshold_0, `ab_threshold_1 +Xx_2187: MOV R14, 0 # gcd_128:414 MOV `ab_threshold_1, 0 +Xx_2188: ADC R14, 0 # gcd_128:415 ADC `ab_threshold_1, 0 +Xx_2189: SUB R15, R13 # gcd_128:420 SUB `ab_delta_0, `ab_threshold_0 +Xx_2190: SBB RCX, R14 # gcd_128:421 SBB `ab_delta_1, `ab_threshold_1 +Xx_2191: JC _label_36 # gcd_128:422 JC _label_36 +Xx_2192: CMP R10, RDX # gcd_128:426 CMP `new_ab_1_0, `b_new_min +Xx_2193: MOV RDX, R11 # gcd_128:427 MOV `b_new_min, `new_ab_1_1 +Xx_2194: SBB RDX, 0 # gcd_128:428 SBB `b_new_min, 0 +Xx_2195: JC _label_36 # gcd_128:429 JC _label_36 +Xx_2196: MOV R13, [RSP+-0x358] # gcd_128:434 MOV `ab_threshold_0, `spill_ab_threshold_0 +Xx_2197: MOV R14, [RSP+-0x350] # gcd_128:435 MOV `ab_threshold_1, `spill_ab_threshold_1 +Xx_2198: MOV R15, R13 # gcd_128:438 MOV `ab_delta_0, `ab_threshold_0 +Xx_2199: MOV RCX, R14 # gcd_128:439 MOV `ab_delta_1, `ab_threshold_1 +Xx_2200: SUB R15, R8 # gcd_128:440 SUB `ab_delta_0, `new_ab_0_0 +Xx_2201: SBB RCX, R9 # gcd_128:441 SBB `ab_delta_1, `new_ab_0_1 +Xx_2202: JNC _label_36 # gcd_128:442 JNC _label_36 +Xx_2203: MOV [RSP+-0x3a8], RBX # gcd_128:445 MOV `spill_u_0, `new_u_0 +Xx_2204: MOV [RSP+-0x3a0], RBP # gcd_128:446 MOV `spill_u_1, `new_u_1 +Xx_2205: MOV [RSP+-0x398], RSI # gcd_128:449 MOV `spill_v_0, `new_v_0 +Xx_2206: MOV [RSP+-0x390], RDI # gcd_128:450 MOV `spill_v_1, `new_v_1 +Xx_2207: MOV [RSP+-0x388], R12 # gcd_128:453 MOV `spill_parity, `new_parity +Xx_2208: MOV R15, [RSP+-0x348] # gcd_128:459 MOV `ab_delta_0, `spill_iter +Xx_2209: DEC R15 # gcd_128:460 DEC `ab_delta_0 +Xx_2210: MOV [RSP+-0x348], R15 # gcd_128:461 MOV `spill_iter, `ab_delta_0 +Xx_2211: JZ _label_37 # gcd_128:462 JZ _label_37 +Xx_2212: _label_34: # gcd_128:465 _label_34: +Xx_2213: XOR RCX, RCX # gcd_128:492 XOR `tmp_3, `tmp_3 +Xx_2214: MOV RAX, R10 # gcd_128:495 MOV `tmp_0, `ab_1_0 +Xx_2215: MOV RDX, R11 # gcd_128:496 MOV `tmp_1, `ab_1_1 +Xx_2216: SUB RAX, R13 # gcd_128:497 SUB `tmp_0, `ab_threshold_0 +Xx_2217: SBB RDX, R14 # gcd_128:498 SBB `tmp_1, `ab_threshold_1 +Xx_2218: JC _label_36 # gcd_128:501 JC _label_36 +Xx_2219: MOV R15, RAX # gcd_128:504 MOV `tmp_2, `tmp_0 +Xx_2220: OR R15, RDX # gcd_128:505 OR `tmp_2, `tmp_1 +Xx_2221: JZ _label_36 # gcd_128:506 JZ _label_36 +Xx_2222: MOV RAX, R9 # gcd_128:511 MOV `tmp_0, `ab_0_1 +Xx_2223: MOV RDX, 64 # gcd_128:512 MOV `tmp_1, 64 +Xx_2224: CMP R9, 0 # gcd_128:513 CMP `ab_0_1, 0 +Xx_2225: JNE _label_43 # gcd_128:516 JNE _label_43 +Xx_2226: MOV RAX, R8 # gcd_128:517 MOV `tmp_0, `ab_0_0 +Xx_2227: _label_43: # gcd_128:518 _label_43: +Xx_2228: JNE _label_44 # gcd_128:521 JNE _label_44 +Xx_2229: MOV RDX, RCX # gcd_128:522 MOV `tmp_1, `tmp_3 +Xx_2230: _label_44: # gcd_128:523 _label_44: +Xx_2231: BSR RAX, RAX # gcd_128:530 BSR `tmp_0, `tmp_0 +Xx_2232: ADD RDX, RAX # gcd_128:533 ADD `tmp_1, `tmp_0 +Xx_2233: INC RDX # gcd_128:534 INC `tmp_1 +Xx_2234: XOR RAX, RAX # gcd_128:545 XOR `tmp_0, `tmp_0 +Xx_2235: MOV R15, [RSP+-0x380] # gcd_128:546 MOV `tmp_2, `spill_is_lehmer +Xx_2236: CMP R15, 0 # gcd_128:547 CMP `tmp_2, 0 +Xx_2237: MOV RCX, 96 # gcd_128:548 MOV `tmp_3, 96 +Xx_2238: CMOVNE RAX, RCX # gcd_128:549 CMOVNE `tmp_0, `tmp_3 +Xx_2239: XOR RCX, RCX # gcd_128:550 XOR `tmp_3, `tmp_3 +Xx_2240: CMP RDX, RAX # gcd_128:553 CMP `tmp_1, `tmp_0 +Xx_2241: CMOVB RDX, RAX # gcd_128:554 CMOVB `tmp_1, `tmp_0 +Xx_2242: SUB RDX, 0x32 # gcd_128:562 SUB `tmp_1, 0x32 +Xx_2243: CMOVB RDX, RCX # gcd_128:566 CMOVB `tmp_1, `tmp_3 +Xx_2244: OR R15, RDX # gcd_128:569 OR `tmp_2, `tmp_1 +Xx_2245: LEA RCX, [RIP+_label_28] # gcd_128:572 LEA `tmp_3, [RIP+_label_28] +Xx_2246: LEA RAX, [RIP+_label_29] # gcd_128:573 LEA `tmp_0, [RIP+_label_29] +Xx_2247: CMOVZ RAX, RCX # gcd_128:587 CMOVZ `tmp_0, `tmp_3 +Xx_2248: MOVAPD XMM3, [RAX] # gcd_128:588 MOVAPD `vector_is_lehmer, [`tmp_0] +Xx_2249: MOV RCX, RDX # shift_right:23 MOV RCX, `amount +Xx_2250: MOV RBP, R10 # shift_right:24 MOV `res, `v_0 +Xx_2251: SHRD RBP, R11, CL # shift_right:25 SHRD `res, `v_1, CL +Xx_2252: XOR R15, R15 # shift_right:28 XOR `res_2, `res_2 +Xx_2253: SUB RCX, 64 # shift_right:31 SUB RCX, 64 +Xx_2254: CMOVAE RBP, R15 # shift_right:35 CMOVAE `res, `res_2 +Xx_2255: CMOVAE R15, R11 # shift_right:36 CMOVAE `res_2, `v_1 +Xx_2256: SHR R15, CL # shift_right:39 SHR `res_2, CL +Xx_2257: OR RBP, R15 # shift_right:42 OR `res, `res_2 +Xx_2258: CVTSI2SD XMM0, RBP # gcd_128:604 CVTSI2SD `vector_ab, `new_u_1 +Xx_2259: SHUFPD XMM0, XMM0, 0 # gcd_128:609 SHUFPD `vector_ab, `vector_ab, 0 +Xx_2260: MOV RCX, RDX # shift_right:23 MOV RCX, `amount +Xx_2261: MOV RBX, R8 # shift_right:24 MOV `res, `v_0 +Xx_2262: SHRD RBX, R9, CL # shift_right:25 SHRD `res, `v_1, CL +Xx_2263: XOR R15, R15 # shift_right:28 XOR `res_2, `res_2 +Xx_2264: SUB RCX, 64 # shift_right:31 SUB RCX, 64 +Xx_2265: CMOVAE RBX, R15 # shift_right:35 CMOVAE `res, `res_2 +Xx_2266: CMOVAE R15, R9 # shift_right:36 CMOVAE `res_2, `v_1 +Xx_2267: SHR R15, CL # shift_right:39 SHR `res_2, CL +Xx_2268: OR RBX, R15 # shift_right:42 OR `res, `res_2 +Xx_2269: CVTSI2SD XMM0, RBX # gcd_128:616 CVTSI2SD `vector_ab, `new_u_0 +Xx_2270: MOV RCX, RDX # shift_right:23 MOV RCX, `amount +Xx_2271: MOV RSI, R13 # shift_right:24 MOV `res, `v_0 +Xx_2272: SHRD RSI, R14, CL # shift_right:25 SHRD `res, `v_1, CL +Xx_2273: XOR R15, R15 # shift_right:28 XOR `res_2, `res_2 +Xx_2274: SUB RCX, 64 # shift_right:31 SUB RCX, 64 +Xx_2275: CMOVAE RSI, R15 # shift_right:35 CMOVAE `res, `res_2 +Xx_2276: CMOVAE R15, R14 # shift_right:36 CMOVAE `res_2, `v_1 +Xx_2277: SHR R15, CL # shift_right:39 SHR `res_2, CL +Xx_2278: OR RSI, R15 # shift_right:42 OR `res, `res_2 +Xx_2279: CVTSI2SD XMM4, RSI # gcd_128:625 CVTSI2SD `vector_ab_threshold, `new_v_0 +Xx_2280: SHUFPD XMM4, XMM4, 0 # gcd_128:626 SHUFPD `vector_ab_threshold, `vector_ab_threshold, 0 +Xx_2281: JMP _label_35 # gcd_128:630 JMP _label_35 +Xx_2282: _label_36: # gcd_128:635 _label_36: +Xx_2283: MOV R15, [RSP+-0x348] # gcd_128:643 MOV `tmp, `spill_iter +Xx_2284: CMP R15, 0x3 # gcd_128:644 CMP `tmp, 0x3 +Xx_2285: JE label_error_1 # gcd_128:645 JE label_error_1 +Xx_2286: JMP _label_37 # gcd_128:647 JMP _label_37 +Xx_2287: _label_38: # gcd_128:652 _label_38: +Xx_2288: MOV R8, [RSP+-0x378] # gcd_128:654 MOV `new_ab_0_0, `spill_ab_start_0_0 +Xx_2289: MOV R9, [RSP+-0x370] # gcd_128:655 MOV `new_ab_0_1, `spill_ab_start_0_1 +Xx_2290: MOV R10, [RSP+-0x368] # gcd_128:656 MOV `new_ab_1_0, `spill_ab_start_1_0 +Xx_2291: MOV R11, [RSP+-0x360] # gcd_128:657 MOV `new_ab_1_1, `spill_ab_start_1_1 +Xx_2292: MOV R13, [RSP+-0x358] # gcd_128:658 MOV `ab_threshold_0, `spill_ab_threshold_0 +Xx_2293: MOV R14, [RSP+-0x350] # gcd_128:659 MOV `ab_threshold_1, `spill_ab_threshold_1 +Xx_2294: JMP _label_34 # gcd_128:661 JMP _label_34 +Xx_2295: _label_37: # gcd_128:666 _label_37: +Xx_2296: .text # gcd_unsigned:446 .text +Xx_2297: .balign 8 # gcd_unsigned:450 .balign 8 +Xx_2298: _label_46: # gcd_unsigned:451 _label_46: +Xx_2299: .text # gcd_unsigned:454 .text +Xx_2300: MOV RBX, [RSP+-0x3b0] # gcd_unsigned:456 MOV `tmp, `spill_a_end_index +Xx_2301: CMP RBX, 1 # gcd_unsigned:466 CMP `tmp, 1 +Xx_2302: JE multiply_uv_size_4 # gcd_unsigned:467 JE multiply_uv_size_4 +Xx_2303: CMP RBX, 2 # gcd_unsigned:466 CMP `tmp, 2 +Xx_2304: JE multiply_uv_size_4 # gcd_unsigned:467 JE multiply_uv_size_4 +Xx_2305: CMP RBX, 3 # gcd_unsigned:466 CMP `tmp, 3 +Xx_2306: JE multiply_uv_size_4 # gcd_unsigned:467 JE multiply_uv_size_4 +Xx_2307: CMP RBX, 4 # gcd_unsigned:466 CMP `tmp, 4 +Xx_2308: JE multiply_uv_size_4 # gcd_unsigned:467 JE multiply_uv_size_4 +Xx_2309: CMP RBX, 5 # gcd_unsigned:466 CMP `tmp, 5 +Xx_2310: JE multiply_uv_size_8 # gcd_unsigned:467 JE multiply_uv_size_8 +Xx_2311: CMP RBX, 6 # gcd_unsigned:466 CMP `tmp, 6 +Xx_2312: JE multiply_uv_size_8 # gcd_unsigned:467 JE multiply_uv_size_8 +Xx_2313: CMP RBX, 7 # gcd_unsigned:466 CMP `tmp, 7 +Xx_2314: JE multiply_uv_size_8 # gcd_unsigned:467 JE multiply_uv_size_8 +Xx_2315: CMP RBX, 8 # gcd_unsigned:466 CMP `tmp, 8 +Xx_2316: JE multiply_uv_size_8 # gcd_unsigned:467 JE multiply_uv_size_8 +Xx_2317: CMP RBX, 9 # gcd_unsigned:466 CMP `tmp, 9 +Xx_2318: JE multiply_uv_size_12 # gcd_unsigned:467 JE multiply_uv_size_12 +Xx_2319: CMP RBX, 10 # gcd_unsigned:466 CMP `tmp, 10 +Xx_2320: JE multiply_uv_size_12 # gcd_unsigned:467 JE multiply_uv_size_12 +Xx_2321: CMP RBX, 11 # gcd_unsigned:466 CMP `tmp, 11 +Xx_2322: JE multiply_uv_size_12 # gcd_unsigned:467 JE multiply_uv_size_12 +Xx_2323: CMP RBX, 12 # gcd_unsigned:466 CMP `tmp, 12 +Xx_2324: JE multiply_uv_size_12 # gcd_unsigned:467 JE multiply_uv_size_12 +Xx_2325: CMP RBX, 13 # gcd_unsigned:466 CMP `tmp, 13 +Xx_2326: JE multiply_uv_size_16 # gcd_unsigned:467 JE multiply_uv_size_16 +Xx_2327: CMP RBX, 14 # gcd_unsigned:466 CMP `tmp, 14 +Xx_2328: JE multiply_uv_size_16 # gcd_unsigned:467 JE multiply_uv_size_16 +Xx_2329: CMP RBX, 15 # gcd_unsigned:466 CMP `tmp, 15 +Xx_2330: JE multiply_uv_size_16 # gcd_unsigned:467 JE multiply_uv_size_16 +Xx_2331: CMP RBX, 16 # gcd_unsigned:466 CMP `tmp, 16 +Xx_2332: JE multiply_uv_size_16 # gcd_unsigned:467 JE multiply_uv_size_16 +Xx_2333: CMP RBX, 17 # gcd_unsigned:466 CMP `tmp, 17 +Xx_2334: JE multiply_uv_size_20 # gcd_unsigned:467 JE multiply_uv_size_20 +Xx_2335: CMP RBX, 18 # gcd_unsigned:466 CMP `tmp, 18 +Xx_2336: JE multiply_uv_size_20 # gcd_unsigned:467 JE multiply_uv_size_20 +Xx_2337: CMP RBX, 19 # gcd_unsigned:466 CMP `tmp, 19 +Xx_2338: JE multiply_uv_size_20 # gcd_unsigned:467 JE multiply_uv_size_20 +Xx_2339: CMP RBX, 20 # gcd_unsigned:466 CMP `tmp, 20 +Xx_2340: JE multiply_uv_size_20 # gcd_unsigned:467 JE multiply_uv_size_20 +Xx_2341: multiply_uv_size_4: # gcd_unsigned:490 multiply_uv_size_4: +Xx_2342: MOV RDI, [RSP+-0x388] # gcd_unsigned:531 MOV `tmp, `spill_parity +Xx_2343: CMP RDI, 0 # gcd_unsigned:532 CMP `tmp, 0 +Xx_2344: MOV RBX, [RSP+-0x3a8] # gcd_unsigned:535 MOV `addr_a, `spill_u_0 +Xx_2345: MOV RBP, [RSP+-0x398] # gcd_unsigned:536 MOV `addr_b, `spill_v_0 +Xx_2346: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_2347: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_2348: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_2349: MOV [RSP+-0x348], RBX # gcd_unsigned:543 MOV `spill_mod_u_0, `addr_a +Xx_2350: MOV [RSP+-0x338], RBP # gcd_unsigned:544 MOV `spill_mod_v_0, `addr_b +Xx_2351: MOV RBX, [RSP+-0x3a0] # gcd_unsigned:535 MOV `addr_a, `spill_u_1 +Xx_2352: MOV RBP, [RSP+-0x390] # gcd_unsigned:536 MOV `addr_b, `spill_v_1 +Xx_2353: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_2354: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_2355: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_2356: MOV [RSP+-0x340], RBX # gcd_unsigned:543 MOV `spill_mod_u_1, `addr_a +Xx_2357: MOV [RSP+-0x330], RBP # gcd_unsigned:544 MOV `spill_mod_v_1, `addr_b +Xx_2358: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:547 MOV `addr_new, `spill_iter +Xx_2359: TEST RSI, 1 # gcd_unsigned:548 TEST `addr_new, 1 +Xx_2360: MOV RBX, [RSP+-0x3f8] # gcd_unsigned:551 MOV `addr_a, `spill_a_addr_base +Xx_2361: CMOVNZ RBX, [RSP+-0x3e8] # gcd_unsigned:552 CMOVNZ `addr_a, `spill_a_2_addr_base +Xx_2362: MOV RBP, [RSP+-0x3f0] # gcd_unsigned:555 MOV `addr_b, `spill_b_addr_base +Xx_2363: CMOVNZ RBP, [RSP+-0x3e0] # gcd_unsigned:556 CMOVNZ `addr_b, `spill_b_2_addr_base +Xx_2364: CMP RDI, 0 # gcd_unsigned:559 CMP `tmp, 0 +Xx_2365: MOV RSI, RBX # gcd_unsigned:560 MOV `addr_new, `addr_a +Xx_2366: CMOVNE RBX, RBP # gcd_unsigned:561 CMOVNE `addr_a, `addr_b +Xx_2367: CMOVNE RBP, RSI # gcd_unsigned:562 CMOVNE `addr_b, `addr_new +Xx_2368: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:567 MOV `addr_new, `spill_iter +Xx_2369: TEST RSI, 1 # gcd_unsigned:568 TEST `addr_new, 1 +Xx_2370: MOV RSI, [RSP+-0x3e0] # gcd_unsigned:569 MOV `addr_new, `spill_b_2_addr_base +Xx_2371: CMOVNZ RSI, [RSP+-0x3f0] # gcd_unsigned:570 CMOVNZ `addr_new, `spill_b_addr_base +Xx_2372: MOV [RSP+-0x328], RSI # gcd_unsigned:571 MOV `spill_addr_b_new, `addr_new +Xx_2373: MOV RSI, [RSP+-0x3e8] # gcd_unsigned:574 MOV `addr_new, `spill_a_2_addr_base +Xx_2374: CMOVNZ RSI, [RSP+-0x3f8] # gcd_unsigned:575 CMOVNZ `addr_new, `spill_a_addr_base +Xx_2375: MOV RDI, [RSP+-0x338] # gcd_unsigned:604 MOV `tmp, `spill_mod_v_0 +Xx_2376: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2377: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_2378: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2379: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2380: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2381: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_2382: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2383: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2384: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2385: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_2386: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2387: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2388: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2389: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_2390: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_2391: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2392: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2393: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2394: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2395: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2396: MOV [RSP+-0x320], R11 # mul_add_slow:266 MOV [RSP+-0x320], `mul_low_0 +Xx_2397: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2398: MOV [RSP+-0x318], R12 # mul_add_slow:266 MOV [RSP+-0x318], `mul_low_1 +Xx_2399: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2400: MOV [RSP+-0x310], R13 # mul_add_slow:266 MOV [RSP+-0x310], `mul_low_2 +Xx_2401: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2402: MOV [RSP+-0x308], RAX # mul_add_slow:266 MOV [RSP+-0x308], `mul_low_3 +Xx_2403: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2404: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_2405: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2406: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2407: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2408: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_2409: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2410: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2411: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2412: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_2413: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2414: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2415: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2416: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_2417: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2418: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2419: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2420: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2421: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2422: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2423: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2424: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2425: MOV [RSP+-0x300], R11 # mul_add_slow:266 MOV [RSP+-0x300], `mul_low_0 +Xx_2426: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2427: MOV [RSP+-0x2f8], R12 # mul_add_slow:266 MOV [RSP+-0x2f8], `mul_low_1 +Xx_2428: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2429: MOV [RSP+-0x2f0], R13 # mul_add_slow:266 MOV [RSP+-0x2f0], `mul_low_2 +Xx_2430: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2431: MOV [RSP+-0x2e8], RAX # mul_add_slow:266 MOV [RSP+-0x2e8], `mul_low_3 +Xx_2432: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2433: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_2434: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2435: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2436: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2437: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_2438: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2439: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2440: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2441: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_2442: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2443: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2444: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2445: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_2446: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2447: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2448: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2449: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2450: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2451: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2452: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2453: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2454: MOV [RSP+-0x2e0], R11 # mul_add_slow:266 MOV [RSP+-0x2e0], `mul_low_0 +Xx_2455: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2456: MOV [RSP+-0x2d8], R12 # mul_add_slow:266 MOV [RSP+-0x2d8], `mul_low_1 +Xx_2457: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2458: MOV [RSP+-0x2d0], R13 # mul_add_slow:266 MOV [RSP+-0x2d0], `mul_low_2 +Xx_2459: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2460: MOV [RSP+-0x2c8], RAX # mul_add_slow:266 MOV [RSP+-0x2c8], `mul_low_3 +Xx_2461: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2462: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_2463: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2464: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2465: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2466: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_2467: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2468: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2469: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2470: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_2471: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2472: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2473: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2474: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_2475: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2476: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2477: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2478: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2479: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2480: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2481: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2482: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2483: MOV [RSP+-0x2c0], R11 # mul_add_slow:266 MOV [RSP+-0x2c0], `mul_low_0 +Xx_2484: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2485: MOV [RSP+-0x2b8], R12 # mul_add_slow:266 MOV [RSP+-0x2b8], `mul_low_1 +Xx_2486: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2487: MOV [RSP+-0x2b0], R13 # mul_add_slow:266 MOV [RSP+-0x2b0], `mul_low_2 +Xx_2488: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2489: MOV [RSP+-0x2a8], RAX # mul_add_slow:266 MOV [RSP+-0x2a8], `mul_low_3 +Xx_2490: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2491: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_2492: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2493: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2494: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2495: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_2496: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2497: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2498: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2499: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_2500: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2501: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2502: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2503: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_2504: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2505: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2506: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2507: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2508: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2509: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2510: MOV [RSP+-0x2a0], R11 # mul_add_slow:266 MOV [RSP+-0x2a0], `mul_low_0 +Xx_2511: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2512: MOV [RSP+-0x298], R12 # mul_add_slow:266 MOV [RSP+-0x298], `mul_low_1 +Xx_2513: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2514: MOV [RSP+-0x290], R13 # mul_add_slow:266 MOV [RSP+-0x290], `mul_low_2 +Xx_2515: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2516: MOV [RSP+-0x288], RAX # mul_add_slow:266 MOV [RSP+-0x288], `mul_low_3 +Xx_2517: MOV RDI, [RSP+-0x340] # gcd_unsigned:608 MOV `tmp, `spill_mod_u_1 +Xx_2518: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2519: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_2520: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2521: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2522: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2523: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_2524: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2525: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2526: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2527: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_2528: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2529: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2530: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2531: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_2532: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_2533: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2534: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2535: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2536: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2537: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2538: MOV [RSP+-0x280], R11 # mul_add_slow:266 MOV [RSP+-0x280], `mul_low_0 +Xx_2539: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2540: MOV [RSP+-0x278], R12 # mul_add_slow:266 MOV [RSP+-0x278], `mul_low_1 +Xx_2541: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2542: MOV [RSP+-0x270], R13 # mul_add_slow:266 MOV [RSP+-0x270], `mul_low_2 +Xx_2543: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2544: MOV [RSP+-0x268], RAX # mul_add_slow:266 MOV [RSP+-0x268], `mul_low_3 +Xx_2545: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2546: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_2547: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2548: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2549: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2550: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_2551: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2552: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2553: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2554: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_2555: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2556: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2557: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2558: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_2559: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2560: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2561: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2562: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2563: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2564: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2565: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2566: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2567: MOV [RSP+-0x260], R11 # mul_add_slow:266 MOV [RSP+-0x260], `mul_low_0 +Xx_2568: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2569: MOV [RSP+-0x258], R12 # mul_add_slow:266 MOV [RSP+-0x258], `mul_low_1 +Xx_2570: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2571: MOV [RSP+-0x250], R13 # mul_add_slow:266 MOV [RSP+-0x250], `mul_low_2 +Xx_2572: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2573: MOV [RSP+-0x248], RAX # mul_add_slow:266 MOV [RSP+-0x248], `mul_low_3 +Xx_2574: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2575: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_2576: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2577: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2578: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2579: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_2580: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2581: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2582: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2583: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_2584: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2585: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2586: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2587: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_2588: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2589: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2590: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2591: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2592: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2593: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2594: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2595: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2596: MOV [RSP+-0x240], R11 # mul_add_slow:266 MOV [RSP+-0x240], `mul_low_0 +Xx_2597: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2598: MOV [RSP+-0x238], R12 # mul_add_slow:266 MOV [RSP+-0x238], `mul_low_1 +Xx_2599: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2600: MOV [RSP+-0x230], R13 # mul_add_slow:266 MOV [RSP+-0x230], `mul_low_2 +Xx_2601: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2602: MOV [RSP+-0x228], RAX # mul_add_slow:266 MOV [RSP+-0x228], `mul_low_3 +Xx_2603: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2604: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_2605: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2606: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2607: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2608: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_2609: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2610: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2611: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2612: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_2613: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2614: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2615: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2616: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_2617: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2618: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2619: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2620: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2621: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2622: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2623: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2624: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2625: MOV [RSP+-0x220], R11 # mul_add_slow:266 MOV [RSP+-0x220], `mul_low_0 +Xx_2626: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2627: MOV [RSP+-0x218], R12 # mul_add_slow:266 MOV [RSP+-0x218], `mul_low_1 +Xx_2628: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2629: MOV [RSP+-0x210], R13 # mul_add_slow:266 MOV [RSP+-0x210], `mul_low_2 +Xx_2630: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2631: MOV [RSP+-0x208], RAX # mul_add_slow:266 MOV [RSP+-0x208], `mul_low_3 +Xx_2632: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2633: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_2634: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2635: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2636: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2637: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_2638: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2639: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2640: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2641: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_2642: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2643: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2644: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2645: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_2646: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2647: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2648: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2649: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2650: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2651: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_2652: MOV [RSP+-0x200], R11 # mul_add_slow:266 MOV [RSP+-0x200], `mul_low_0 +Xx_2653: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_2654: MOV [RSP+-0x1f8], R12 # mul_add_slow:266 MOV [RSP+-0x1f8], `mul_low_1 +Xx_2655: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_2656: MOV [RSP+-0x1f0], R13 # mul_add_slow:266 MOV [RSP+-0x1f0], `mul_low_2 +Xx_2657: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_2658: MOV [RSP+-0x1e8], RAX # mul_add_slow:266 MOV [RSP+-0x1e8], `mul_low_3 +Xx_2659: MOV RDI, [RSP+-0x348] # gcd_unsigned:612 MOV `tmp, `spill_mod_u_0 +Xx_2660: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2661: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_2662: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2663: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2664: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2665: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_2666: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2667: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2668: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2669: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_2670: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2671: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2672: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2673: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_2674: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_2675: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2676: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2677: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2678: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2679: STC # mul_add_slow:243 STC +Xx_2680: ADC R11, [RSP+-0x320] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x320] +Xx_2681: ADC R12, [RSP+-0x318] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x318] +Xx_2682: ADC R13, [RSP+-0x310] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x310] +Xx_2683: ADC RAX, [RSP+-0x308] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x308] +Xx_2684: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2685: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_2686: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_2687: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_2688: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_2689: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2690: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_2691: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2692: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2693: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2694: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_2695: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2696: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2697: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2698: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_2699: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2700: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2701: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2702: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_2703: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2704: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2705: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2706: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2707: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2708: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2709: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2710: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2711: ADC R11, [RSP+-0x300] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x300] +Xx_2712: ADC R12, [RSP+-0x2f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2f8] +Xx_2713: ADC R13, [RSP+-0x2f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2f0] +Xx_2714: ADC RAX, [RSP+-0x2e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2e8] +Xx_2715: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2716: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_2717: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_2718: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_2719: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_2720: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2721: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_2722: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2723: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2724: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2725: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_2726: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2727: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2728: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2729: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_2730: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2731: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2732: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2733: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_2734: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2735: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2736: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2737: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2738: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2739: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2740: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2741: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2742: ADC R11, [RSP+-0x2e0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2e0] +Xx_2743: ADC R12, [RSP+-0x2d8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2d8] +Xx_2744: ADC R13, [RSP+-0x2d0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2d0] +Xx_2745: ADC RAX, [RSP+-0x2c8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2c8] +Xx_2746: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2747: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_2748: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_2749: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_2750: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_2751: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2752: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_2753: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2754: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2755: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2756: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_2757: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2758: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2759: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2760: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_2761: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2762: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2763: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2764: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_2765: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2766: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2767: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2768: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2769: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2770: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2771: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2772: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2773: ADC R11, [RSP+-0x2c0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2c0] +Xx_2774: ADC R12, [RSP+-0x2b8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2b8] +Xx_2775: ADC R13, [RSP+-0x2b0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2b0] +Xx_2776: ADC RAX, [RSP+-0x2a8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2a8] +Xx_2777: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2778: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_2779: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_2780: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_2781: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_2782: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2783: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_2784: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2785: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2786: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2787: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_2788: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2789: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2790: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2791: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_2792: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2793: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2794: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2795: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_2796: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2797: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2798: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2799: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2800: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2801: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2802: ADC R11, [RSP+-0x2a0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2a0] +Xx_2803: ADC R12, [RSP+-0x298] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x298] +Xx_2804: ADC R13, [RSP+-0x290] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x290] +Xx_2805: ADC RAX, [RSP+-0x288] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x288] +Xx_2806: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_2807: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_2808: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_2809: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_2810: MOV RSI, [RSP+-0x328] # gcd_unsigned:616 MOV `addr_new, `spill_addr_b_new +Xx_2811: MOV RDI, [RSP+-0x330] # gcd_unsigned:617 MOV `tmp, `spill_mod_v_1 +Xx_2812: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2813: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_2814: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2815: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2816: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2817: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_2818: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2819: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2820: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2821: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_2822: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2823: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2824: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2825: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_2826: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_2827: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2828: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2829: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2830: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2831: STC # mul_add_slow:243 STC +Xx_2832: ADC R11, [RSP+-0x280] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x280] +Xx_2833: ADC R12, [RSP+-0x278] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x278] +Xx_2834: ADC R13, [RSP+-0x270] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x270] +Xx_2835: ADC RAX, [RSP+-0x268] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x268] +Xx_2836: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2837: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_2838: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_2839: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_2840: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_2841: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2842: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_2843: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2844: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2845: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2846: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_2847: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2848: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2849: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2850: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_2851: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2852: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2853: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2854: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_2855: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2856: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2857: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2858: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2859: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2860: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2861: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2862: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2863: ADC R11, [RSP+-0x260] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x260] +Xx_2864: ADC R12, [RSP+-0x258] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x258] +Xx_2865: ADC R13, [RSP+-0x250] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x250] +Xx_2866: ADC RAX, [RSP+-0x248] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x248] +Xx_2867: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2868: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_2869: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_2870: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_2871: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_2872: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2873: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_2874: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2875: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2876: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2877: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_2878: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2879: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2880: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2881: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_2882: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2883: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2884: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2885: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_2886: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2887: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2888: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2889: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2890: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2891: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2892: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2893: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2894: ADC R11, [RSP+-0x240] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x240] +Xx_2895: ADC R12, [RSP+-0x238] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x238] +Xx_2896: ADC R13, [RSP+-0x230] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x230] +Xx_2897: ADC RAX, [RSP+-0x228] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x228] +Xx_2898: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2899: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_2900: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_2901: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_2902: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_2903: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2904: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_2905: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2906: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2907: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2908: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_2909: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2910: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2911: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2912: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_2913: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2914: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2915: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2916: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_2917: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2918: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2919: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2920: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2921: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2922: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_2923: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_2924: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2925: ADC R11, [RSP+-0x220] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x220] +Xx_2926: ADC R12, [RSP+-0x218] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x218] +Xx_2927: ADC R13, [RSP+-0x210] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x210] +Xx_2928: ADC RAX, [RSP+-0x208] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x208] +Xx_2929: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_2930: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_2931: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_2932: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_2933: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_2934: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2935: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_2936: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_2937: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_2938: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2939: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_2940: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_2941: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_2942: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2943: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_2944: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_2945: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_2946: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2947: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_2948: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_2949: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_2950: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_2951: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_2952: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_2953: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_2954: ADC R11, [RSP+-0x200] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x200] +Xx_2955: ADC R12, [RSP+-0x1f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x1f8] +Xx_2956: ADC R13, [RSP+-0x1f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x1f0] +Xx_2957: ADC RAX, [RSP+-0x1e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x1e8] +Xx_2958: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_2959: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_2960: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_2961: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_2962: JMP _label_45 # gcd_unsigned:620 JMP _label_45 +Xx_2963: multiply_uv_size_8: # gcd_unsigned:490 multiply_uv_size_8: +Xx_2964: MOV RDI, [RSP+-0x388] # gcd_unsigned:531 MOV `tmp, `spill_parity +Xx_2965: CMP RDI, 0 # gcd_unsigned:532 CMP `tmp, 0 +Xx_2966: MOV RBX, [RSP+-0x3a8] # gcd_unsigned:535 MOV `addr_a, `spill_u_0 +Xx_2967: MOV RBP, [RSP+-0x398] # gcd_unsigned:536 MOV `addr_b, `spill_v_0 +Xx_2968: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_2969: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_2970: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_2971: MOV [RSP+-0x348], RBX # gcd_unsigned:543 MOV `spill_mod_u_0, `addr_a +Xx_2972: MOV [RSP+-0x338], RBP # gcd_unsigned:544 MOV `spill_mod_v_0, `addr_b +Xx_2973: MOV RBX, [RSP+-0x3a0] # gcd_unsigned:535 MOV `addr_a, `spill_u_1 +Xx_2974: MOV RBP, [RSP+-0x390] # gcd_unsigned:536 MOV `addr_b, `spill_v_1 +Xx_2975: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_2976: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_2977: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_2978: MOV [RSP+-0x340], RBX # gcd_unsigned:543 MOV `spill_mod_u_1, `addr_a +Xx_2979: MOV [RSP+-0x330], RBP # gcd_unsigned:544 MOV `spill_mod_v_1, `addr_b +Xx_2980: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:547 MOV `addr_new, `spill_iter +Xx_2981: TEST RSI, 1 # gcd_unsigned:548 TEST `addr_new, 1 +Xx_2982: MOV RBX, [RSP+-0x3f8] # gcd_unsigned:551 MOV `addr_a, `spill_a_addr_base +Xx_2983: CMOVNZ RBX, [RSP+-0x3e8] # gcd_unsigned:552 CMOVNZ `addr_a, `spill_a_2_addr_base +Xx_2984: MOV RBP, [RSP+-0x3f0] # gcd_unsigned:555 MOV `addr_b, `spill_b_addr_base +Xx_2985: CMOVNZ RBP, [RSP+-0x3e0] # gcd_unsigned:556 CMOVNZ `addr_b, `spill_b_2_addr_base +Xx_2986: CMP RDI, 0 # gcd_unsigned:559 CMP `tmp, 0 +Xx_2987: MOV RSI, RBX # gcd_unsigned:560 MOV `addr_new, `addr_a +Xx_2988: CMOVNE RBX, RBP # gcd_unsigned:561 CMOVNE `addr_a, `addr_b +Xx_2989: CMOVNE RBP, RSI # gcd_unsigned:562 CMOVNE `addr_b, `addr_new +Xx_2990: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:567 MOV `addr_new, `spill_iter +Xx_2991: TEST RSI, 1 # gcd_unsigned:568 TEST `addr_new, 1 +Xx_2992: MOV RSI, [RSP+-0x3e0] # gcd_unsigned:569 MOV `addr_new, `spill_b_2_addr_base +Xx_2993: CMOVNZ RSI, [RSP+-0x3f0] # gcd_unsigned:570 CMOVNZ `addr_new, `spill_b_addr_base +Xx_2994: MOV [RSP+-0x328], RSI # gcd_unsigned:571 MOV `spill_addr_b_new, `addr_new +Xx_2995: MOV RSI, [RSP+-0x3e8] # gcd_unsigned:574 MOV `addr_new, `spill_a_2_addr_base +Xx_2996: CMOVNZ RSI, [RSP+-0x3f8] # gcd_unsigned:575 CMOVNZ `addr_new, `spill_a_addr_base +Xx_2997: MOV RDI, [RSP+-0x338] # gcd_unsigned:604 MOV `tmp, `spill_mod_v_0 +Xx_2998: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_2999: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_3000: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3001: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3002: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3003: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_3004: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3005: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3006: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3007: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_3008: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3009: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3010: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3011: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_3012: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3013: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3014: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3015: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3016: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3017: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3018: MOV [RSP+-0x320], R11 # mul_add_slow:266 MOV [RSP+-0x320], `mul_low_0 +Xx_3019: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3020: MOV [RSP+-0x318], R12 # mul_add_slow:266 MOV [RSP+-0x318], `mul_low_1 +Xx_3021: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3022: MOV [RSP+-0x310], R13 # mul_add_slow:266 MOV [RSP+-0x310], `mul_low_2 +Xx_3023: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3024: MOV [RSP+-0x308], RAX # mul_add_slow:266 MOV [RSP+-0x308], `mul_low_3 +Xx_3025: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3026: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_3027: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3028: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3029: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3030: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_3031: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3032: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3033: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3034: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_3035: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3036: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3037: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3038: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_3039: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3040: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3041: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3042: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3043: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3044: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3045: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3046: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3047: MOV [RSP+-0x300], R11 # mul_add_slow:266 MOV [RSP+-0x300], `mul_low_0 +Xx_3048: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3049: MOV [RSP+-0x2f8], R12 # mul_add_slow:266 MOV [RSP+-0x2f8], `mul_low_1 +Xx_3050: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3051: MOV [RSP+-0x2f0], R13 # mul_add_slow:266 MOV [RSP+-0x2f0], `mul_low_2 +Xx_3052: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3053: MOV [RSP+-0x2e8], RAX # mul_add_slow:266 MOV [RSP+-0x2e8], `mul_low_3 +Xx_3054: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3055: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_3056: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3057: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3058: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3059: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_3060: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3061: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3062: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3063: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_3064: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3065: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3066: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3067: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_3068: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3069: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3070: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3071: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3072: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3073: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3074: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3075: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3076: MOV [RSP+-0x2e0], R11 # mul_add_slow:266 MOV [RSP+-0x2e0], `mul_low_0 +Xx_3077: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3078: MOV [RSP+-0x2d8], R12 # mul_add_slow:266 MOV [RSP+-0x2d8], `mul_low_1 +Xx_3079: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3080: MOV [RSP+-0x2d0], R13 # mul_add_slow:266 MOV [RSP+-0x2d0], `mul_low_2 +Xx_3081: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3082: MOV [RSP+-0x2c8], RAX # mul_add_slow:266 MOV [RSP+-0x2c8], `mul_low_3 +Xx_3083: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3084: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_3085: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3086: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3087: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3088: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_3089: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3090: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3091: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3092: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_3093: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3094: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3095: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3096: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_3097: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3098: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3099: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3100: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3101: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3102: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3103: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3104: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3105: MOV [RSP+-0x2c0], R11 # mul_add_slow:266 MOV [RSP+-0x2c0], `mul_low_0 +Xx_3106: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3107: MOV [RSP+-0x2b8], R12 # mul_add_slow:266 MOV [RSP+-0x2b8], `mul_low_1 +Xx_3108: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3109: MOV [RSP+-0x2b0], R13 # mul_add_slow:266 MOV [RSP+-0x2b0], `mul_low_2 +Xx_3110: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3111: MOV [RSP+-0x2a8], RAX # mul_add_slow:266 MOV [RSP+-0x2a8], `mul_low_3 +Xx_3112: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3113: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_3114: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3115: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3116: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3117: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_3118: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3119: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3120: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3121: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_3122: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3123: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3124: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3125: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_3126: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3127: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3128: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3129: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3130: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3131: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3132: MOV [RSP+-0x2a0], R11 # mul_add_slow:266 MOV [RSP+-0x2a0], `mul_low_0 +Xx_3133: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3134: MOV [RSP+-0x298], R12 # mul_add_slow:266 MOV [RSP+-0x298], `mul_low_1 +Xx_3135: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3136: MOV [RSP+-0x290], R13 # mul_add_slow:266 MOV [RSP+-0x290], `mul_low_2 +Xx_3137: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3138: MOV [RSP+-0x288], RAX # mul_add_slow:266 MOV [RSP+-0x288], `mul_low_3 +Xx_3139: MOV RDI, [RSP+-0x340] # gcd_unsigned:608 MOV `tmp, `spill_mod_u_1 +Xx_3140: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3141: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_3142: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3143: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3144: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3145: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_3146: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3147: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3148: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3149: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_3150: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3151: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3152: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3153: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_3154: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3155: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3156: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3157: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3158: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3159: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3160: MOV [RSP+-0x280], R11 # mul_add_slow:266 MOV [RSP+-0x280], `mul_low_0 +Xx_3161: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3162: MOV [RSP+-0x278], R12 # mul_add_slow:266 MOV [RSP+-0x278], `mul_low_1 +Xx_3163: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3164: MOV [RSP+-0x270], R13 # mul_add_slow:266 MOV [RSP+-0x270], `mul_low_2 +Xx_3165: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3166: MOV [RSP+-0x268], RAX # mul_add_slow:266 MOV [RSP+-0x268], `mul_low_3 +Xx_3167: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3168: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_3169: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3170: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3171: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3172: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_3173: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3174: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3175: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3176: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_3177: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3178: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3179: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3180: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_3181: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3182: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3183: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3184: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3185: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3186: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3187: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3188: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3189: MOV [RSP+-0x260], R11 # mul_add_slow:266 MOV [RSP+-0x260], `mul_low_0 +Xx_3190: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3191: MOV [RSP+-0x258], R12 # mul_add_slow:266 MOV [RSP+-0x258], `mul_low_1 +Xx_3192: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3193: MOV [RSP+-0x250], R13 # mul_add_slow:266 MOV [RSP+-0x250], `mul_low_2 +Xx_3194: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3195: MOV [RSP+-0x248], RAX # mul_add_slow:266 MOV [RSP+-0x248], `mul_low_3 +Xx_3196: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3197: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_3198: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3199: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3200: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3201: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_3202: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3203: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3204: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3205: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_3206: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3207: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3208: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3209: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_3210: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3211: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3212: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3213: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3214: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3215: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3216: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3217: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3218: MOV [RSP+-0x240], R11 # mul_add_slow:266 MOV [RSP+-0x240], `mul_low_0 +Xx_3219: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3220: MOV [RSP+-0x238], R12 # mul_add_slow:266 MOV [RSP+-0x238], `mul_low_1 +Xx_3221: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3222: MOV [RSP+-0x230], R13 # mul_add_slow:266 MOV [RSP+-0x230], `mul_low_2 +Xx_3223: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3224: MOV [RSP+-0x228], RAX # mul_add_slow:266 MOV [RSP+-0x228], `mul_low_3 +Xx_3225: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3226: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_3227: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3228: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3229: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3230: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_3231: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3232: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3233: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3234: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_3235: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3236: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3237: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3238: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_3239: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3240: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3241: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3242: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3243: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3244: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3245: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3246: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3247: MOV [RSP+-0x220], R11 # mul_add_slow:266 MOV [RSP+-0x220], `mul_low_0 +Xx_3248: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3249: MOV [RSP+-0x218], R12 # mul_add_slow:266 MOV [RSP+-0x218], `mul_low_1 +Xx_3250: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3251: MOV [RSP+-0x210], R13 # mul_add_slow:266 MOV [RSP+-0x210], `mul_low_2 +Xx_3252: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3253: MOV [RSP+-0x208], RAX # mul_add_slow:266 MOV [RSP+-0x208], `mul_low_3 +Xx_3254: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3255: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_3256: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3257: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3258: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3259: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_3260: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3261: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3262: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3263: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_3264: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3265: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3266: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3267: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_3268: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3269: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3270: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3271: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3272: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3273: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3274: MOV [RSP+-0x200], R11 # mul_add_slow:266 MOV [RSP+-0x200], `mul_low_0 +Xx_3275: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3276: MOV [RSP+-0x1f8], R12 # mul_add_slow:266 MOV [RSP+-0x1f8], `mul_low_1 +Xx_3277: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3278: MOV [RSP+-0x1f0], R13 # mul_add_slow:266 MOV [RSP+-0x1f0], `mul_low_2 +Xx_3279: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3280: MOV [RSP+-0x1e8], RAX # mul_add_slow:266 MOV [RSP+-0x1e8], `mul_low_3 +Xx_3281: MOV RDI, [RSP+-0x348] # gcd_unsigned:612 MOV `tmp, `spill_mod_u_0 +Xx_3282: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3283: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_3284: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3285: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3286: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3287: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_3288: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3289: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3290: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3291: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_3292: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3293: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3294: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3295: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_3296: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3297: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3298: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3299: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3300: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3301: STC # mul_add_slow:243 STC +Xx_3302: ADC R11, [RSP+-0x320] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x320] +Xx_3303: ADC R12, [RSP+-0x318] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x318] +Xx_3304: ADC R13, [RSP+-0x310] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x310] +Xx_3305: ADC RAX, [RSP+-0x308] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x308] +Xx_3306: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3307: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_3308: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_3309: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_3310: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_3311: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3312: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_3313: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3314: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3315: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3316: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_3317: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3318: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3319: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3320: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_3321: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3322: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3323: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3324: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_3325: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3326: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3327: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3328: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3329: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3330: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3331: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3332: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3333: ADC R11, [RSP+-0x300] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x300] +Xx_3334: ADC R12, [RSP+-0x2f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2f8] +Xx_3335: ADC R13, [RSP+-0x2f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2f0] +Xx_3336: ADC RAX, [RSP+-0x2e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2e8] +Xx_3337: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3338: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_3339: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_3340: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_3341: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_3342: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3343: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_3344: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3345: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3346: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3347: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_3348: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3349: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3350: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3351: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_3352: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3353: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3354: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3355: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_3356: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3357: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3358: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3359: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3360: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3361: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3362: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3363: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3364: ADC R11, [RSP+-0x2e0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2e0] +Xx_3365: ADC R12, [RSP+-0x2d8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2d8] +Xx_3366: ADC R13, [RSP+-0x2d0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2d0] +Xx_3367: ADC RAX, [RSP+-0x2c8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2c8] +Xx_3368: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3369: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_3370: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_3371: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_3372: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_3373: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3374: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_3375: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3376: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3377: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3378: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_3379: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3380: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3381: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3382: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_3383: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3384: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3385: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3386: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_3387: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3388: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3389: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3390: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3391: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3392: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3393: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3394: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3395: ADC R11, [RSP+-0x2c0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2c0] +Xx_3396: ADC R12, [RSP+-0x2b8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2b8] +Xx_3397: ADC R13, [RSP+-0x2b0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2b0] +Xx_3398: ADC RAX, [RSP+-0x2a8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2a8] +Xx_3399: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3400: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_3401: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_3402: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_3403: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_3404: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3405: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_3406: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3407: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3408: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3409: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_3410: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3411: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3412: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3413: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_3414: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3415: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3416: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3417: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_3418: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3419: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3420: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3421: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3422: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3423: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3424: ADC R11, [RSP+-0x2a0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2a0] +Xx_3425: ADC R12, [RSP+-0x298] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x298] +Xx_3426: ADC R13, [RSP+-0x290] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x290] +Xx_3427: ADC RAX, [RSP+-0x288] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x288] +Xx_3428: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_3429: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_3430: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_3431: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_3432: MOV RSI, [RSP+-0x328] # gcd_unsigned:616 MOV `addr_new, `spill_addr_b_new +Xx_3433: MOV RDI, [RSP+-0x330] # gcd_unsigned:617 MOV `tmp, `spill_mod_v_1 +Xx_3434: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3435: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_3436: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3437: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3438: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3439: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_3440: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3441: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3442: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3443: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_3444: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3445: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3446: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3447: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_3448: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3449: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3450: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3451: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3452: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3453: STC # mul_add_slow:243 STC +Xx_3454: ADC R11, [RSP+-0x280] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x280] +Xx_3455: ADC R12, [RSP+-0x278] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x278] +Xx_3456: ADC R13, [RSP+-0x270] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x270] +Xx_3457: ADC RAX, [RSP+-0x268] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x268] +Xx_3458: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3459: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_3460: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_3461: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_3462: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_3463: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3464: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_3465: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3466: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3467: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3468: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_3469: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3470: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3471: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3472: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_3473: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3474: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3475: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3476: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_3477: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3478: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3479: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3480: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3481: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3482: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3483: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3484: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3485: ADC R11, [RSP+-0x260] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x260] +Xx_3486: ADC R12, [RSP+-0x258] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x258] +Xx_3487: ADC R13, [RSP+-0x250] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x250] +Xx_3488: ADC RAX, [RSP+-0x248] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x248] +Xx_3489: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3490: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_3491: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_3492: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_3493: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_3494: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3495: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_3496: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3497: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3498: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3499: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_3500: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3501: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3502: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3503: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_3504: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3505: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3506: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3507: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_3508: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3509: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3510: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3511: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3512: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3513: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3514: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3515: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3516: ADC R11, [RSP+-0x240] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x240] +Xx_3517: ADC R12, [RSP+-0x238] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x238] +Xx_3518: ADC R13, [RSP+-0x230] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x230] +Xx_3519: ADC RAX, [RSP+-0x228] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x228] +Xx_3520: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3521: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_3522: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_3523: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_3524: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_3525: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3526: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_3527: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3528: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3529: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3530: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_3531: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3532: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3533: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3534: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_3535: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3536: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3537: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3538: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_3539: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3540: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3541: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3542: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3543: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3544: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3545: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3546: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3547: ADC R11, [RSP+-0x220] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x220] +Xx_3548: ADC R12, [RSP+-0x218] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x218] +Xx_3549: ADC R13, [RSP+-0x210] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x210] +Xx_3550: ADC RAX, [RSP+-0x208] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x208] +Xx_3551: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3552: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_3553: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_3554: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_3555: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_3556: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3557: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_3558: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3559: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3560: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3561: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_3562: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3563: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3564: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3565: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_3566: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3567: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3568: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3569: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_3570: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3571: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3572: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3573: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3574: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3575: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3576: ADC R11, [RSP+-0x200] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x200] +Xx_3577: ADC R12, [RSP+-0x1f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x1f8] +Xx_3578: ADC R13, [RSP+-0x1f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x1f0] +Xx_3579: ADC RAX, [RSP+-0x1e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x1e8] +Xx_3580: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_3581: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_3582: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_3583: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_3584: JMP _label_45 # gcd_unsigned:620 JMP _label_45 +Xx_3585: multiply_uv_size_12: # gcd_unsigned:490 multiply_uv_size_12: +Xx_3586: MOV RDI, [RSP+-0x388] # gcd_unsigned:531 MOV `tmp, `spill_parity +Xx_3587: CMP RDI, 0 # gcd_unsigned:532 CMP `tmp, 0 +Xx_3588: MOV RBX, [RSP+-0x3a8] # gcd_unsigned:535 MOV `addr_a, `spill_u_0 +Xx_3589: MOV RBP, [RSP+-0x398] # gcd_unsigned:536 MOV `addr_b, `spill_v_0 +Xx_3590: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_3591: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_3592: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_3593: MOV [RSP+-0x348], RBX # gcd_unsigned:543 MOV `spill_mod_u_0, `addr_a +Xx_3594: MOV [RSP+-0x338], RBP # gcd_unsigned:544 MOV `spill_mod_v_0, `addr_b +Xx_3595: MOV RBX, [RSP+-0x3a0] # gcd_unsigned:535 MOV `addr_a, `spill_u_1 +Xx_3596: MOV RBP, [RSP+-0x390] # gcd_unsigned:536 MOV `addr_b, `spill_v_1 +Xx_3597: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_3598: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_3599: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_3600: MOV [RSP+-0x340], RBX # gcd_unsigned:543 MOV `spill_mod_u_1, `addr_a +Xx_3601: MOV [RSP+-0x330], RBP # gcd_unsigned:544 MOV `spill_mod_v_1, `addr_b +Xx_3602: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:547 MOV `addr_new, `spill_iter +Xx_3603: TEST RSI, 1 # gcd_unsigned:548 TEST `addr_new, 1 +Xx_3604: MOV RBX, [RSP+-0x3f8] # gcd_unsigned:551 MOV `addr_a, `spill_a_addr_base +Xx_3605: CMOVNZ RBX, [RSP+-0x3e8] # gcd_unsigned:552 CMOVNZ `addr_a, `spill_a_2_addr_base +Xx_3606: MOV RBP, [RSP+-0x3f0] # gcd_unsigned:555 MOV `addr_b, `spill_b_addr_base +Xx_3607: CMOVNZ RBP, [RSP+-0x3e0] # gcd_unsigned:556 CMOVNZ `addr_b, `spill_b_2_addr_base +Xx_3608: CMP RDI, 0 # gcd_unsigned:559 CMP `tmp, 0 +Xx_3609: MOV RSI, RBX # gcd_unsigned:560 MOV `addr_new, `addr_a +Xx_3610: CMOVNE RBX, RBP # gcd_unsigned:561 CMOVNE `addr_a, `addr_b +Xx_3611: CMOVNE RBP, RSI # gcd_unsigned:562 CMOVNE `addr_b, `addr_new +Xx_3612: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:567 MOV `addr_new, `spill_iter +Xx_3613: TEST RSI, 1 # gcd_unsigned:568 TEST `addr_new, 1 +Xx_3614: MOV RSI, [RSP+-0x3e0] # gcd_unsigned:569 MOV `addr_new, `spill_b_2_addr_base +Xx_3615: CMOVNZ RSI, [RSP+-0x3f0] # gcd_unsigned:570 CMOVNZ `addr_new, `spill_b_addr_base +Xx_3616: MOV [RSP+-0x328], RSI # gcd_unsigned:571 MOV `spill_addr_b_new, `addr_new +Xx_3617: MOV RSI, [RSP+-0x3e8] # gcd_unsigned:574 MOV `addr_new, `spill_a_2_addr_base +Xx_3618: CMOVNZ RSI, [RSP+-0x3f8] # gcd_unsigned:575 CMOVNZ `addr_new, `spill_a_addr_base +Xx_3619: MOV RDI, [RSP+-0x338] # gcd_unsigned:604 MOV `tmp, `spill_mod_v_0 +Xx_3620: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3621: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_3622: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3623: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3624: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3625: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_3626: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3627: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3628: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3629: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_3630: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3631: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3632: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3633: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_3634: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3635: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3636: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3637: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3638: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3639: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3640: MOV [RSP+-0x320], R11 # mul_add_slow:266 MOV [RSP+-0x320], `mul_low_0 +Xx_3641: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3642: MOV [RSP+-0x318], R12 # mul_add_slow:266 MOV [RSP+-0x318], `mul_low_1 +Xx_3643: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3644: MOV [RSP+-0x310], R13 # mul_add_slow:266 MOV [RSP+-0x310], `mul_low_2 +Xx_3645: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3646: MOV [RSP+-0x308], RAX # mul_add_slow:266 MOV [RSP+-0x308], `mul_low_3 +Xx_3647: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3648: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_3649: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3650: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3651: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3652: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_3653: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3654: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3655: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3656: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_3657: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3658: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3659: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3660: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_3661: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3662: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3663: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3664: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3665: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3666: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3667: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3668: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3669: MOV [RSP+-0x300], R11 # mul_add_slow:266 MOV [RSP+-0x300], `mul_low_0 +Xx_3670: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3671: MOV [RSP+-0x2f8], R12 # mul_add_slow:266 MOV [RSP+-0x2f8], `mul_low_1 +Xx_3672: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3673: MOV [RSP+-0x2f0], R13 # mul_add_slow:266 MOV [RSP+-0x2f0], `mul_low_2 +Xx_3674: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3675: MOV [RSP+-0x2e8], RAX # mul_add_slow:266 MOV [RSP+-0x2e8], `mul_low_3 +Xx_3676: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3677: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_3678: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3679: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3680: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3681: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_3682: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3683: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3684: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3685: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_3686: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3687: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3688: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3689: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_3690: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3691: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3692: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3693: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3694: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3695: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3696: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3697: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3698: MOV [RSP+-0x2e0], R11 # mul_add_slow:266 MOV [RSP+-0x2e0], `mul_low_0 +Xx_3699: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3700: MOV [RSP+-0x2d8], R12 # mul_add_slow:266 MOV [RSP+-0x2d8], `mul_low_1 +Xx_3701: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3702: MOV [RSP+-0x2d0], R13 # mul_add_slow:266 MOV [RSP+-0x2d0], `mul_low_2 +Xx_3703: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3704: MOV [RSP+-0x2c8], RAX # mul_add_slow:266 MOV [RSP+-0x2c8], `mul_low_3 +Xx_3705: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3706: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_3707: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3708: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3709: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3710: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_3711: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3712: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3713: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3714: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_3715: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3716: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3717: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3718: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_3719: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3720: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3721: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3722: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3723: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3724: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3725: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3726: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3727: MOV [RSP+-0x2c0], R11 # mul_add_slow:266 MOV [RSP+-0x2c0], `mul_low_0 +Xx_3728: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3729: MOV [RSP+-0x2b8], R12 # mul_add_slow:266 MOV [RSP+-0x2b8], `mul_low_1 +Xx_3730: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3731: MOV [RSP+-0x2b0], R13 # mul_add_slow:266 MOV [RSP+-0x2b0], `mul_low_2 +Xx_3732: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3733: MOV [RSP+-0x2a8], RAX # mul_add_slow:266 MOV [RSP+-0x2a8], `mul_low_3 +Xx_3734: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3735: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_3736: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3737: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3738: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3739: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_3740: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3741: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3742: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3743: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_3744: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3745: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3746: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3747: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_3748: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3749: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3750: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3751: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3752: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3753: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3754: MOV [RSP+-0x2a0], R11 # mul_add_slow:266 MOV [RSP+-0x2a0], `mul_low_0 +Xx_3755: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3756: MOV [RSP+-0x298], R12 # mul_add_slow:266 MOV [RSP+-0x298], `mul_low_1 +Xx_3757: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3758: MOV [RSP+-0x290], R13 # mul_add_slow:266 MOV [RSP+-0x290], `mul_low_2 +Xx_3759: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3760: MOV [RSP+-0x288], RAX # mul_add_slow:266 MOV [RSP+-0x288], `mul_low_3 +Xx_3761: MOV RDI, [RSP+-0x340] # gcd_unsigned:608 MOV `tmp, `spill_mod_u_1 +Xx_3762: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3763: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_3764: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3765: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3766: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3767: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_3768: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3769: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3770: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3771: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_3772: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3773: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3774: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3775: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_3776: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3777: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3778: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3779: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3780: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3781: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3782: MOV [RSP+-0x280], R11 # mul_add_slow:266 MOV [RSP+-0x280], `mul_low_0 +Xx_3783: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3784: MOV [RSP+-0x278], R12 # mul_add_slow:266 MOV [RSP+-0x278], `mul_low_1 +Xx_3785: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3786: MOV [RSP+-0x270], R13 # mul_add_slow:266 MOV [RSP+-0x270], `mul_low_2 +Xx_3787: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3788: MOV [RSP+-0x268], RAX # mul_add_slow:266 MOV [RSP+-0x268], `mul_low_3 +Xx_3789: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3790: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_3791: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3792: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3793: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3794: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_3795: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3796: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3797: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3798: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_3799: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3800: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3801: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3802: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_3803: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3804: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3805: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3806: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3807: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3808: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3809: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3810: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3811: MOV [RSP+-0x260], R11 # mul_add_slow:266 MOV [RSP+-0x260], `mul_low_0 +Xx_3812: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3813: MOV [RSP+-0x258], R12 # mul_add_slow:266 MOV [RSP+-0x258], `mul_low_1 +Xx_3814: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3815: MOV [RSP+-0x250], R13 # mul_add_slow:266 MOV [RSP+-0x250], `mul_low_2 +Xx_3816: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3817: MOV [RSP+-0x248], RAX # mul_add_slow:266 MOV [RSP+-0x248], `mul_low_3 +Xx_3818: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3819: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_3820: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3821: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3822: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3823: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_3824: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3825: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3826: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3827: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_3828: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3829: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3830: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3831: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_3832: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3833: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3834: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3835: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3836: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3837: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3838: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3839: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3840: MOV [RSP+-0x240], R11 # mul_add_slow:266 MOV [RSP+-0x240], `mul_low_0 +Xx_3841: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3842: MOV [RSP+-0x238], R12 # mul_add_slow:266 MOV [RSP+-0x238], `mul_low_1 +Xx_3843: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3844: MOV [RSP+-0x230], R13 # mul_add_slow:266 MOV [RSP+-0x230], `mul_low_2 +Xx_3845: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3846: MOV [RSP+-0x228], RAX # mul_add_slow:266 MOV [RSP+-0x228], `mul_low_3 +Xx_3847: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3848: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_3849: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3850: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3851: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3852: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_3853: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3854: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3855: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3856: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_3857: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3858: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3859: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3860: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_3861: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3862: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3863: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3864: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3865: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3866: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3867: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3868: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3869: MOV [RSP+-0x220], R11 # mul_add_slow:266 MOV [RSP+-0x220], `mul_low_0 +Xx_3870: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3871: MOV [RSP+-0x218], R12 # mul_add_slow:266 MOV [RSP+-0x218], `mul_low_1 +Xx_3872: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3873: MOV [RSP+-0x210], R13 # mul_add_slow:266 MOV [RSP+-0x210], `mul_low_2 +Xx_3874: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3875: MOV [RSP+-0x208], RAX # mul_add_slow:266 MOV [RSP+-0x208], `mul_low_3 +Xx_3876: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3877: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_3878: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3879: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3880: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3881: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_3882: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3883: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3884: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3885: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_3886: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3887: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3888: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3889: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_3890: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3891: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3892: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3893: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3894: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3895: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_3896: MOV [RSP+-0x200], R11 # mul_add_slow:266 MOV [RSP+-0x200], `mul_low_0 +Xx_3897: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_3898: MOV [RSP+-0x1f8], R12 # mul_add_slow:266 MOV [RSP+-0x1f8], `mul_low_1 +Xx_3899: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_3900: MOV [RSP+-0x1f0], R13 # mul_add_slow:266 MOV [RSP+-0x1f0], `mul_low_2 +Xx_3901: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_3902: MOV [RSP+-0x1e8], RAX # mul_add_slow:266 MOV [RSP+-0x1e8], `mul_low_3 +Xx_3903: MOV RDI, [RSP+-0x348] # gcd_unsigned:612 MOV `tmp, `spill_mod_u_0 +Xx_3904: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3905: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_3906: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3907: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3908: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3909: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_3910: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3911: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3912: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3913: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_3914: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3915: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3916: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3917: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_3918: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_3919: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3920: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3921: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3922: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3923: STC # mul_add_slow:243 STC +Xx_3924: ADC R11, [RSP+-0x320] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x320] +Xx_3925: ADC R12, [RSP+-0x318] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x318] +Xx_3926: ADC R13, [RSP+-0x310] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x310] +Xx_3927: ADC RAX, [RSP+-0x308] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x308] +Xx_3928: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3929: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_3930: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_3931: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_3932: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_3933: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3934: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_3935: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3936: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3937: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3938: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_3939: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3940: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3941: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3942: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_3943: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3944: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3945: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3946: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_3947: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3948: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3949: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3950: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3951: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3952: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3953: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3954: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3955: ADC R11, [RSP+-0x300] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x300] +Xx_3956: ADC R12, [RSP+-0x2f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2f8] +Xx_3957: ADC R13, [RSP+-0x2f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2f0] +Xx_3958: ADC RAX, [RSP+-0x2e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2e8] +Xx_3959: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3960: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_3961: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_3962: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_3963: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_3964: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3965: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_3966: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3967: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3968: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3969: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_3970: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_3971: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_3972: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3973: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_3974: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_3975: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_3976: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3977: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_3978: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_3979: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_3980: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_3981: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_3982: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_3983: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_3984: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_3985: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_3986: ADC R11, [RSP+-0x2e0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2e0] +Xx_3987: ADC R12, [RSP+-0x2d8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2d8] +Xx_3988: ADC R13, [RSP+-0x2d0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2d0] +Xx_3989: ADC RAX, [RSP+-0x2c8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2c8] +Xx_3990: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_3991: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_3992: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_3993: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_3994: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_3995: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_3996: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_3997: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_3998: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_3999: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4000: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_4001: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4002: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4003: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4004: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_4005: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4006: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4007: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4008: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_4009: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4010: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4011: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4012: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4013: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4014: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4015: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4016: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4017: ADC R11, [RSP+-0x2c0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2c0] +Xx_4018: ADC R12, [RSP+-0x2b8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2b8] +Xx_4019: ADC R13, [RSP+-0x2b0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2b0] +Xx_4020: ADC RAX, [RSP+-0x2a8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2a8] +Xx_4021: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4022: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_4023: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_4024: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_4025: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_4026: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4027: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_4028: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4029: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4030: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4031: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_4032: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4033: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4034: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4035: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_4036: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4037: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4038: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4039: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_4040: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4041: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4042: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4043: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4044: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4045: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4046: ADC R11, [RSP+-0x2a0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2a0] +Xx_4047: ADC R12, [RSP+-0x298] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x298] +Xx_4048: ADC R13, [RSP+-0x290] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x290] +Xx_4049: ADC RAX, [RSP+-0x288] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x288] +Xx_4050: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_4051: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_4052: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_4053: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_4054: MOV RSI, [RSP+-0x328] # gcd_unsigned:616 MOV `addr_new, `spill_addr_b_new +Xx_4055: MOV RDI, [RSP+-0x330] # gcd_unsigned:617 MOV `tmp, `spill_mod_v_1 +Xx_4056: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4057: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_4058: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4059: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4060: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4061: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_4062: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4063: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4064: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4065: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_4066: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4067: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4068: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4069: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_4070: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_4071: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4072: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4073: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4074: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4075: STC # mul_add_slow:243 STC +Xx_4076: ADC R11, [RSP+-0x280] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x280] +Xx_4077: ADC R12, [RSP+-0x278] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x278] +Xx_4078: ADC R13, [RSP+-0x270] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x270] +Xx_4079: ADC RAX, [RSP+-0x268] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x268] +Xx_4080: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4081: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_4082: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_4083: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_4084: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_4085: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4086: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_4087: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4088: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4089: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4090: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_4091: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4092: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4093: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4094: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_4095: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4096: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4097: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4098: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_4099: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4100: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4101: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4102: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4103: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4104: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4105: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4106: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4107: ADC R11, [RSP+-0x260] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x260] +Xx_4108: ADC R12, [RSP+-0x258] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x258] +Xx_4109: ADC R13, [RSP+-0x250] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x250] +Xx_4110: ADC RAX, [RSP+-0x248] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x248] +Xx_4111: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4112: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_4113: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_4114: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_4115: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_4116: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4117: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_4118: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4119: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4120: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4121: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_4122: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4123: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4124: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4125: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_4126: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4127: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4128: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4129: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_4130: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4131: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4132: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4133: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4134: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4135: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4136: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4137: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4138: ADC R11, [RSP+-0x240] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x240] +Xx_4139: ADC R12, [RSP+-0x238] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x238] +Xx_4140: ADC R13, [RSP+-0x230] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x230] +Xx_4141: ADC RAX, [RSP+-0x228] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x228] +Xx_4142: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4143: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_4144: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_4145: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_4146: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_4147: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4148: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_4149: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4150: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4151: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4152: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_4153: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4154: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4155: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4156: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_4157: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4158: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4159: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4160: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_4161: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4162: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4163: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4164: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4165: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4166: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4167: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4168: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4169: ADC R11, [RSP+-0x220] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x220] +Xx_4170: ADC R12, [RSP+-0x218] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x218] +Xx_4171: ADC R13, [RSP+-0x210] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x210] +Xx_4172: ADC RAX, [RSP+-0x208] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x208] +Xx_4173: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4174: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_4175: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_4176: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_4177: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_4178: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4179: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_4180: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4181: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4182: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4183: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_4184: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4185: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4186: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4187: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_4188: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4189: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4190: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4191: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_4192: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4193: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4194: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4195: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4196: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4197: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4198: ADC R11, [RSP+-0x200] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x200] +Xx_4199: ADC R12, [RSP+-0x1f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x1f8] +Xx_4200: ADC R13, [RSP+-0x1f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x1f0] +Xx_4201: ADC RAX, [RSP+-0x1e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x1e8] +Xx_4202: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_4203: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_4204: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_4205: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_4206: JMP _label_45 # gcd_unsigned:620 JMP _label_45 +Xx_4207: multiply_uv_size_16: # gcd_unsigned:490 multiply_uv_size_16: +Xx_4208: MOV RDI, [RSP+-0x388] # gcd_unsigned:531 MOV `tmp, `spill_parity +Xx_4209: CMP RDI, 0 # gcd_unsigned:532 CMP `tmp, 0 +Xx_4210: MOV RBX, [RSP+-0x3a8] # gcd_unsigned:535 MOV `addr_a, `spill_u_0 +Xx_4211: MOV RBP, [RSP+-0x398] # gcd_unsigned:536 MOV `addr_b, `spill_v_0 +Xx_4212: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_4213: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_4214: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_4215: MOV [RSP+-0x348], RBX # gcd_unsigned:543 MOV `spill_mod_u_0, `addr_a +Xx_4216: MOV [RSP+-0x338], RBP # gcd_unsigned:544 MOV `spill_mod_v_0, `addr_b +Xx_4217: MOV RBX, [RSP+-0x3a0] # gcd_unsigned:535 MOV `addr_a, `spill_u_1 +Xx_4218: MOV RBP, [RSP+-0x390] # gcd_unsigned:536 MOV `addr_b, `spill_v_1 +Xx_4219: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_4220: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_4221: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_4222: MOV [RSP+-0x340], RBX # gcd_unsigned:543 MOV `spill_mod_u_1, `addr_a +Xx_4223: MOV [RSP+-0x330], RBP # gcd_unsigned:544 MOV `spill_mod_v_1, `addr_b +Xx_4224: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:547 MOV `addr_new, `spill_iter +Xx_4225: TEST RSI, 1 # gcd_unsigned:548 TEST `addr_new, 1 +Xx_4226: MOV RBX, [RSP+-0x3f8] # gcd_unsigned:551 MOV `addr_a, `spill_a_addr_base +Xx_4227: CMOVNZ RBX, [RSP+-0x3e8] # gcd_unsigned:552 CMOVNZ `addr_a, `spill_a_2_addr_base +Xx_4228: MOV RBP, [RSP+-0x3f0] # gcd_unsigned:555 MOV `addr_b, `spill_b_addr_base +Xx_4229: CMOVNZ RBP, [RSP+-0x3e0] # gcd_unsigned:556 CMOVNZ `addr_b, `spill_b_2_addr_base +Xx_4230: CMP RDI, 0 # gcd_unsigned:559 CMP `tmp, 0 +Xx_4231: MOV RSI, RBX # gcd_unsigned:560 MOV `addr_new, `addr_a +Xx_4232: CMOVNE RBX, RBP # gcd_unsigned:561 CMOVNE `addr_a, `addr_b +Xx_4233: CMOVNE RBP, RSI # gcd_unsigned:562 CMOVNE `addr_b, `addr_new +Xx_4234: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:567 MOV `addr_new, `spill_iter +Xx_4235: TEST RSI, 1 # gcd_unsigned:568 TEST `addr_new, 1 +Xx_4236: MOV RSI, [RSP+-0x3e0] # gcd_unsigned:569 MOV `addr_new, `spill_b_2_addr_base +Xx_4237: CMOVNZ RSI, [RSP+-0x3f0] # gcd_unsigned:570 CMOVNZ `addr_new, `spill_b_addr_base +Xx_4238: MOV [RSP+-0x328], RSI # gcd_unsigned:571 MOV `spill_addr_b_new, `addr_new +Xx_4239: MOV RSI, [RSP+-0x3e8] # gcd_unsigned:574 MOV `addr_new, `spill_a_2_addr_base +Xx_4240: CMOVNZ RSI, [RSP+-0x3f8] # gcd_unsigned:575 CMOVNZ `addr_new, `spill_a_addr_base +Xx_4241: MOV RDI, [RSP+-0x338] # gcd_unsigned:604 MOV `tmp, `spill_mod_v_0 +Xx_4242: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4243: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_4244: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4245: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4246: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4247: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_4248: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4249: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4250: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4251: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_4252: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4253: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4254: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4255: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_4256: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_4257: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4258: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4259: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4260: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4261: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4262: MOV [RSP+-0x320], R11 # mul_add_slow:266 MOV [RSP+-0x320], `mul_low_0 +Xx_4263: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4264: MOV [RSP+-0x318], R12 # mul_add_slow:266 MOV [RSP+-0x318], `mul_low_1 +Xx_4265: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4266: MOV [RSP+-0x310], R13 # mul_add_slow:266 MOV [RSP+-0x310], `mul_low_2 +Xx_4267: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4268: MOV [RSP+-0x308], RAX # mul_add_slow:266 MOV [RSP+-0x308], `mul_low_3 +Xx_4269: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4270: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_4271: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4272: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4273: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4274: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_4275: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4276: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4277: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4278: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_4279: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4280: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4281: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4282: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_4283: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4284: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4285: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4286: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4287: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4288: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4289: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4290: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4291: MOV [RSP+-0x300], R11 # mul_add_slow:266 MOV [RSP+-0x300], `mul_low_0 +Xx_4292: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4293: MOV [RSP+-0x2f8], R12 # mul_add_slow:266 MOV [RSP+-0x2f8], `mul_low_1 +Xx_4294: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4295: MOV [RSP+-0x2f0], R13 # mul_add_slow:266 MOV [RSP+-0x2f0], `mul_low_2 +Xx_4296: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4297: MOV [RSP+-0x2e8], RAX # mul_add_slow:266 MOV [RSP+-0x2e8], `mul_low_3 +Xx_4298: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4299: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_4300: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4301: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4302: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4303: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_4304: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4305: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4306: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4307: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_4308: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4309: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4310: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4311: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_4312: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4313: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4314: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4315: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4316: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4317: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4318: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4319: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4320: MOV [RSP+-0x2e0], R11 # mul_add_slow:266 MOV [RSP+-0x2e0], `mul_low_0 +Xx_4321: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4322: MOV [RSP+-0x2d8], R12 # mul_add_slow:266 MOV [RSP+-0x2d8], `mul_low_1 +Xx_4323: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4324: MOV [RSP+-0x2d0], R13 # mul_add_slow:266 MOV [RSP+-0x2d0], `mul_low_2 +Xx_4325: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4326: MOV [RSP+-0x2c8], RAX # mul_add_slow:266 MOV [RSP+-0x2c8], `mul_low_3 +Xx_4327: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4328: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_4329: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4330: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4331: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4332: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_4333: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4334: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4335: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4336: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_4337: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4338: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4339: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4340: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_4341: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4342: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4343: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4344: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4345: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4346: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4347: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4348: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4349: MOV [RSP+-0x2c0], R11 # mul_add_slow:266 MOV [RSP+-0x2c0], `mul_low_0 +Xx_4350: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4351: MOV [RSP+-0x2b8], R12 # mul_add_slow:266 MOV [RSP+-0x2b8], `mul_low_1 +Xx_4352: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4353: MOV [RSP+-0x2b0], R13 # mul_add_slow:266 MOV [RSP+-0x2b0], `mul_low_2 +Xx_4354: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4355: MOV [RSP+-0x2a8], RAX # mul_add_slow:266 MOV [RSP+-0x2a8], `mul_low_3 +Xx_4356: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4357: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_4358: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4359: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4360: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4361: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_4362: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4363: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4364: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4365: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_4366: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4367: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4368: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4369: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_4370: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4371: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4372: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4373: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4374: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4375: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4376: MOV [RSP+-0x2a0], R11 # mul_add_slow:266 MOV [RSP+-0x2a0], `mul_low_0 +Xx_4377: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4378: MOV [RSP+-0x298], R12 # mul_add_slow:266 MOV [RSP+-0x298], `mul_low_1 +Xx_4379: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4380: MOV [RSP+-0x290], R13 # mul_add_slow:266 MOV [RSP+-0x290], `mul_low_2 +Xx_4381: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4382: MOV [RSP+-0x288], RAX # mul_add_slow:266 MOV [RSP+-0x288], `mul_low_3 +Xx_4383: MOV RDI, [RSP+-0x340] # gcd_unsigned:608 MOV `tmp, `spill_mod_u_1 +Xx_4384: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4385: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_4386: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4387: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4388: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4389: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_4390: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4391: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4392: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4393: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_4394: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4395: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4396: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4397: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_4398: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_4399: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4400: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4401: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4402: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4403: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4404: MOV [RSP+-0x280], R11 # mul_add_slow:266 MOV [RSP+-0x280], `mul_low_0 +Xx_4405: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4406: MOV [RSP+-0x278], R12 # mul_add_slow:266 MOV [RSP+-0x278], `mul_low_1 +Xx_4407: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4408: MOV [RSP+-0x270], R13 # mul_add_slow:266 MOV [RSP+-0x270], `mul_low_2 +Xx_4409: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4410: MOV [RSP+-0x268], RAX # mul_add_slow:266 MOV [RSP+-0x268], `mul_low_3 +Xx_4411: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4412: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_4413: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4414: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4415: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4416: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_4417: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4418: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4419: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4420: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_4421: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4422: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4423: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4424: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_4425: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4426: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4427: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4428: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4429: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4430: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4431: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4432: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4433: MOV [RSP+-0x260], R11 # mul_add_slow:266 MOV [RSP+-0x260], `mul_low_0 +Xx_4434: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4435: MOV [RSP+-0x258], R12 # mul_add_slow:266 MOV [RSP+-0x258], `mul_low_1 +Xx_4436: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4437: MOV [RSP+-0x250], R13 # mul_add_slow:266 MOV [RSP+-0x250], `mul_low_2 +Xx_4438: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4439: MOV [RSP+-0x248], RAX # mul_add_slow:266 MOV [RSP+-0x248], `mul_low_3 +Xx_4440: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4441: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_4442: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4443: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4444: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4445: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_4446: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4447: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4448: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4449: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_4450: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4451: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4452: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4453: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_4454: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4455: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4456: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4457: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4458: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4459: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4460: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4461: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4462: MOV [RSP+-0x240], R11 # mul_add_slow:266 MOV [RSP+-0x240], `mul_low_0 +Xx_4463: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4464: MOV [RSP+-0x238], R12 # mul_add_slow:266 MOV [RSP+-0x238], `mul_low_1 +Xx_4465: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4466: MOV [RSP+-0x230], R13 # mul_add_slow:266 MOV [RSP+-0x230], `mul_low_2 +Xx_4467: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4468: MOV [RSP+-0x228], RAX # mul_add_slow:266 MOV [RSP+-0x228], `mul_low_3 +Xx_4469: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4470: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_4471: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4472: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4473: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4474: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_4475: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4476: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4477: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4478: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_4479: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4480: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4481: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4482: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_4483: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4484: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4485: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4486: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4487: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4488: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4489: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4490: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4491: MOV [RSP+-0x220], R11 # mul_add_slow:266 MOV [RSP+-0x220], `mul_low_0 +Xx_4492: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4493: MOV [RSP+-0x218], R12 # mul_add_slow:266 MOV [RSP+-0x218], `mul_low_1 +Xx_4494: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4495: MOV [RSP+-0x210], R13 # mul_add_slow:266 MOV [RSP+-0x210], `mul_low_2 +Xx_4496: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4497: MOV [RSP+-0x208], RAX # mul_add_slow:266 MOV [RSP+-0x208], `mul_low_3 +Xx_4498: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4499: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_4500: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4501: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4502: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4503: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_4504: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4505: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4506: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4507: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_4508: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4509: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4510: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4511: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_4512: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4513: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4514: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4515: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4516: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4517: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4518: MOV [RSP+-0x200], R11 # mul_add_slow:266 MOV [RSP+-0x200], `mul_low_0 +Xx_4519: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4520: MOV [RSP+-0x1f8], R12 # mul_add_slow:266 MOV [RSP+-0x1f8], `mul_low_1 +Xx_4521: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4522: MOV [RSP+-0x1f0], R13 # mul_add_slow:266 MOV [RSP+-0x1f0], `mul_low_2 +Xx_4523: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4524: MOV [RSP+-0x1e8], RAX # mul_add_slow:266 MOV [RSP+-0x1e8], `mul_low_3 +Xx_4525: MOV RDI, [RSP+-0x348] # gcd_unsigned:612 MOV `tmp, `spill_mod_u_0 +Xx_4526: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4527: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_4528: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4529: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4530: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4531: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_4532: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4533: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4534: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4535: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_4536: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4537: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4538: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4539: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_4540: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_4541: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4542: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4543: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4544: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4545: STC # mul_add_slow:243 STC +Xx_4546: ADC R11, [RSP+-0x320] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x320] +Xx_4547: ADC R12, [RSP+-0x318] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x318] +Xx_4548: ADC R13, [RSP+-0x310] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x310] +Xx_4549: ADC RAX, [RSP+-0x308] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x308] +Xx_4550: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4551: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_4552: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_4553: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_4554: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_4555: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4556: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_4557: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4558: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4559: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4560: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_4561: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4562: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4563: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4564: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_4565: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4566: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4567: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4568: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_4569: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4570: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4571: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4572: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4573: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4574: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4575: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4576: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4577: ADC R11, [RSP+-0x300] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x300] +Xx_4578: ADC R12, [RSP+-0x2f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2f8] +Xx_4579: ADC R13, [RSP+-0x2f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2f0] +Xx_4580: ADC RAX, [RSP+-0x2e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2e8] +Xx_4581: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4582: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_4583: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_4584: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_4585: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_4586: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4587: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_4588: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4589: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4590: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4591: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_4592: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4593: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4594: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4595: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_4596: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4597: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4598: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4599: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_4600: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4601: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4602: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4603: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4604: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4605: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4606: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4607: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4608: ADC R11, [RSP+-0x2e0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2e0] +Xx_4609: ADC R12, [RSP+-0x2d8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2d8] +Xx_4610: ADC R13, [RSP+-0x2d0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2d0] +Xx_4611: ADC RAX, [RSP+-0x2c8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2c8] +Xx_4612: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4613: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_4614: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_4615: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_4616: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_4617: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4618: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_4619: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4620: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4621: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4622: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_4623: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4624: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4625: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4626: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_4627: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4628: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4629: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4630: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_4631: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4632: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4633: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4634: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4635: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4636: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4637: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4638: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4639: ADC R11, [RSP+-0x2c0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2c0] +Xx_4640: ADC R12, [RSP+-0x2b8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2b8] +Xx_4641: ADC R13, [RSP+-0x2b0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2b0] +Xx_4642: ADC RAX, [RSP+-0x2a8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2a8] +Xx_4643: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4644: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_4645: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_4646: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_4647: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_4648: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4649: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_4650: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4651: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4652: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4653: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_4654: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4655: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4656: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4657: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_4658: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4659: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4660: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4661: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_4662: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4663: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4664: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4665: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4666: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4667: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4668: ADC R11, [RSP+-0x2a0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2a0] +Xx_4669: ADC R12, [RSP+-0x298] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x298] +Xx_4670: ADC R13, [RSP+-0x290] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x290] +Xx_4671: ADC RAX, [RSP+-0x288] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x288] +Xx_4672: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_4673: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_4674: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_4675: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_4676: MOV RSI, [RSP+-0x328] # gcd_unsigned:616 MOV `addr_new, `spill_addr_b_new +Xx_4677: MOV RDI, [RSP+-0x330] # gcd_unsigned:617 MOV `tmp, `spill_mod_v_1 +Xx_4678: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4679: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_4680: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4681: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4682: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4683: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_4684: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4685: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4686: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4687: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_4688: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4689: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4690: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4691: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_4692: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_4693: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4694: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4695: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4696: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4697: STC # mul_add_slow:243 STC +Xx_4698: ADC R11, [RSP+-0x280] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x280] +Xx_4699: ADC R12, [RSP+-0x278] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x278] +Xx_4700: ADC R13, [RSP+-0x270] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x270] +Xx_4701: ADC RAX, [RSP+-0x268] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x268] +Xx_4702: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4703: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_4704: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_4705: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_4706: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_4707: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4708: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_4709: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4710: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4711: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4712: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_4713: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4714: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4715: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4716: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_4717: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4718: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4719: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4720: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_4721: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4722: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4723: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4724: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4725: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4726: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4727: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4728: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4729: ADC R11, [RSP+-0x260] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x260] +Xx_4730: ADC R12, [RSP+-0x258] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x258] +Xx_4731: ADC R13, [RSP+-0x250] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x250] +Xx_4732: ADC RAX, [RSP+-0x248] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x248] +Xx_4733: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4734: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_4735: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_4736: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_4737: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_4738: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4739: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_4740: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4741: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4742: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4743: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_4744: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4745: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4746: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4747: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_4748: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4749: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4750: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4751: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_4752: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4753: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4754: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4755: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4756: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4757: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4758: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4759: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4760: ADC R11, [RSP+-0x240] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x240] +Xx_4761: ADC R12, [RSP+-0x238] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x238] +Xx_4762: ADC R13, [RSP+-0x230] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x230] +Xx_4763: ADC RAX, [RSP+-0x228] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x228] +Xx_4764: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4765: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_4766: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_4767: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_4768: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_4769: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4770: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_4771: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4772: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4773: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4774: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_4775: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4776: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4777: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4778: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_4779: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4780: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4781: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4782: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_4783: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4784: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4785: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4786: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4787: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4788: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4789: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4790: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4791: ADC R11, [RSP+-0x220] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x220] +Xx_4792: ADC R12, [RSP+-0x218] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x218] +Xx_4793: ADC R13, [RSP+-0x210] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x210] +Xx_4794: ADC RAX, [RSP+-0x208] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x208] +Xx_4795: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_4796: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_4797: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_4798: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_4799: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_4800: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4801: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_4802: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4803: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4804: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4805: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_4806: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4807: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4808: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4809: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_4810: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4811: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4812: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4813: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_4814: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4815: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4816: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4817: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4818: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4819: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_4820: ADC R11, [RSP+-0x200] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x200] +Xx_4821: ADC R12, [RSP+-0x1f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x1f8] +Xx_4822: ADC R13, [RSP+-0x1f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x1f0] +Xx_4823: ADC RAX, [RSP+-0x1e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x1e8] +Xx_4824: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_4825: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_4826: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_4827: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_4828: JMP _label_45 # gcd_unsigned:620 JMP _label_45 +Xx_4829: multiply_uv_size_20: # gcd_unsigned:490 multiply_uv_size_20: +Xx_4830: MOV RDI, [RSP+-0x388] # gcd_unsigned:531 MOV `tmp, `spill_parity +Xx_4831: CMP RDI, 0 # gcd_unsigned:532 CMP `tmp, 0 +Xx_4832: MOV RBX, [RSP+-0x3a8] # gcd_unsigned:535 MOV `addr_a, `spill_u_0 +Xx_4833: MOV RBP, [RSP+-0x398] # gcd_unsigned:536 MOV `addr_b, `spill_v_0 +Xx_4834: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_4835: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_4836: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_4837: MOV [RSP+-0x348], RBX # gcd_unsigned:543 MOV `spill_mod_u_0, `addr_a +Xx_4838: MOV [RSP+-0x338], RBP # gcd_unsigned:544 MOV `spill_mod_v_0, `addr_b +Xx_4839: MOV RBX, [RSP+-0x3a0] # gcd_unsigned:535 MOV `addr_a, `spill_u_1 +Xx_4840: MOV RBP, [RSP+-0x390] # gcd_unsigned:536 MOV `addr_b, `spill_v_1 +Xx_4841: MOV RSI, RBX # gcd_unsigned:539 MOV `addr_new, `addr_a +Xx_4842: CMOVNE RBX, RBP # gcd_unsigned:540 CMOVNE `addr_a, `addr_b +Xx_4843: CMOVNE RBP, RSI # gcd_unsigned:541 CMOVNE `addr_b, `addr_new +Xx_4844: MOV [RSP+-0x340], RBX # gcd_unsigned:543 MOV `spill_mod_u_1, `addr_a +Xx_4845: MOV [RSP+-0x330], RBP # gcd_unsigned:544 MOV `spill_mod_v_1, `addr_b +Xx_4846: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:547 MOV `addr_new, `spill_iter +Xx_4847: TEST RSI, 1 # gcd_unsigned:548 TEST `addr_new, 1 +Xx_4848: MOV RBX, [RSP+-0x3f8] # gcd_unsigned:551 MOV `addr_a, `spill_a_addr_base +Xx_4849: CMOVNZ RBX, [RSP+-0x3e8] # gcd_unsigned:552 CMOVNZ `addr_a, `spill_a_2_addr_base +Xx_4850: MOV RBP, [RSP+-0x3f0] # gcd_unsigned:555 MOV `addr_b, `spill_b_addr_base +Xx_4851: CMOVNZ RBP, [RSP+-0x3e0] # gcd_unsigned:556 CMOVNZ `addr_b, `spill_b_2_addr_base +Xx_4852: CMP RDI, 0 # gcd_unsigned:559 CMP `tmp, 0 +Xx_4853: MOV RSI, RBX # gcd_unsigned:560 MOV `addr_new, `addr_a +Xx_4854: CMOVNE RBX, RBP # gcd_unsigned:561 CMOVNE `addr_a, `addr_b +Xx_4855: CMOVNE RBP, RSI # gcd_unsigned:562 CMOVNE `addr_b, `addr_new +Xx_4856: MOV RSI, [RSP+-0x3b8] # gcd_unsigned:567 MOV `addr_new, `spill_iter +Xx_4857: TEST RSI, 1 # gcd_unsigned:568 TEST `addr_new, 1 +Xx_4858: MOV RSI, [RSP+-0x3e0] # gcd_unsigned:569 MOV `addr_new, `spill_b_2_addr_base +Xx_4859: CMOVNZ RSI, [RSP+-0x3f0] # gcd_unsigned:570 CMOVNZ `addr_new, `spill_b_addr_base +Xx_4860: MOV [RSP+-0x328], RSI # gcd_unsigned:571 MOV `spill_addr_b_new, `addr_new +Xx_4861: MOV RSI, [RSP+-0x3e8] # gcd_unsigned:574 MOV `addr_new, `spill_a_2_addr_base +Xx_4862: CMOVNZ RSI, [RSP+-0x3f8] # gcd_unsigned:575 CMOVNZ `addr_new, `spill_a_addr_base +Xx_4863: MOV RDI, [RSP+-0x338] # gcd_unsigned:604 MOV `tmp, `spill_mod_v_0 +Xx_4864: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4865: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_4866: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4867: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4868: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4869: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_4870: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4871: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4872: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4873: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_4874: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4875: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4876: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4877: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_4878: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_4879: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4880: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4881: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4882: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4883: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4884: MOV [RSP+-0x320], R11 # mul_add_slow:266 MOV [RSP+-0x320], `mul_low_0 +Xx_4885: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4886: MOV [RSP+-0x318], R12 # mul_add_slow:266 MOV [RSP+-0x318], `mul_low_1 +Xx_4887: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4888: MOV [RSP+-0x310], R13 # mul_add_slow:266 MOV [RSP+-0x310], `mul_low_2 +Xx_4889: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4890: MOV [RSP+-0x308], RAX # mul_add_slow:266 MOV [RSP+-0x308], `mul_low_3 +Xx_4891: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4892: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_4893: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4894: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4895: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4896: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_4897: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4898: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4899: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4900: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_4901: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4902: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4903: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4904: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_4905: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4906: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4907: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4908: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4909: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4910: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4911: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4912: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4913: MOV [RSP+-0x300], R11 # mul_add_slow:266 MOV [RSP+-0x300], `mul_low_0 +Xx_4914: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4915: MOV [RSP+-0x2f8], R12 # mul_add_slow:266 MOV [RSP+-0x2f8], `mul_low_1 +Xx_4916: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4917: MOV [RSP+-0x2f0], R13 # mul_add_slow:266 MOV [RSP+-0x2f0], `mul_low_2 +Xx_4918: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4919: MOV [RSP+-0x2e8], RAX # mul_add_slow:266 MOV [RSP+-0x2e8], `mul_low_3 +Xx_4920: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4921: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_4922: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4923: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4924: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4925: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_4926: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4927: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4928: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4929: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_4930: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4931: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4932: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4933: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_4934: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4935: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4936: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4937: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4938: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4939: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4940: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4941: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4942: MOV [RSP+-0x2e0], R11 # mul_add_slow:266 MOV [RSP+-0x2e0], `mul_low_0 +Xx_4943: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4944: MOV [RSP+-0x2d8], R12 # mul_add_slow:266 MOV [RSP+-0x2d8], `mul_low_1 +Xx_4945: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4946: MOV [RSP+-0x2d0], R13 # mul_add_slow:266 MOV [RSP+-0x2d0], `mul_low_2 +Xx_4947: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4948: MOV [RSP+-0x2c8], RAX # mul_add_slow:266 MOV [RSP+-0x2c8], `mul_low_3 +Xx_4949: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4950: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_4951: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4952: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4953: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4954: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_4955: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4956: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4957: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4958: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_4959: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4960: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4961: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4962: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_4963: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4964: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4965: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4966: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4967: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4968: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_4969: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_4970: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4971: MOV [RSP+-0x2c0], R11 # mul_add_slow:266 MOV [RSP+-0x2c0], `mul_low_0 +Xx_4972: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_4973: MOV [RSP+-0x2b8], R12 # mul_add_slow:266 MOV [RSP+-0x2b8], `mul_low_1 +Xx_4974: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_4975: MOV [RSP+-0x2b0], R13 # mul_add_slow:266 MOV [RSP+-0x2b0], `mul_low_2 +Xx_4976: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_4977: MOV [RSP+-0x2a8], RAX # mul_add_slow:266 MOV [RSP+-0x2a8], `mul_low_3 +Xx_4978: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4979: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_4980: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_4981: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_4982: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4983: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_4984: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_4985: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_4986: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4987: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_4988: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_4989: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_4990: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_4991: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_4992: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_4993: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_4994: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_4995: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_4996: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_4997: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_4998: MOV [RSP+-0x2a0], R11 # mul_add_slow:266 MOV [RSP+-0x2a0], `mul_low_0 +Xx_4999: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_5000: MOV [RSP+-0x298], R12 # mul_add_slow:266 MOV [RSP+-0x298], `mul_low_1 +Xx_5001: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_5002: MOV [RSP+-0x290], R13 # mul_add_slow:266 MOV [RSP+-0x290], `mul_low_2 +Xx_5003: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_5004: MOV [RSP+-0x288], RAX # mul_add_slow:266 MOV [RSP+-0x288], `mul_low_3 +Xx_5005: MOV RDI, [RSP+-0x340] # gcd_unsigned:608 MOV `tmp, `spill_mod_u_1 +Xx_5006: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5007: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_5008: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5009: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5010: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5011: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_5012: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5013: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5014: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5015: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_5016: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5017: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5018: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5019: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_5020: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_5021: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5022: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5023: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5024: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5025: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_5026: MOV [RSP+-0x280], R11 # mul_add_slow:266 MOV [RSP+-0x280], `mul_low_0 +Xx_5027: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_5028: MOV [RSP+-0x278], R12 # mul_add_slow:266 MOV [RSP+-0x278], `mul_low_1 +Xx_5029: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_5030: MOV [RSP+-0x270], R13 # mul_add_slow:266 MOV [RSP+-0x270], `mul_low_2 +Xx_5031: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_5032: MOV [RSP+-0x268], RAX # mul_add_slow:266 MOV [RSP+-0x268], `mul_low_3 +Xx_5033: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5034: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_5035: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5036: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5037: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5038: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_5039: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5040: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5041: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5042: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_5043: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5044: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5045: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5046: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_5047: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5048: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5049: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5050: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5051: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5052: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5053: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5054: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_5055: MOV [RSP+-0x260], R11 # mul_add_slow:266 MOV [RSP+-0x260], `mul_low_0 +Xx_5056: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_5057: MOV [RSP+-0x258], R12 # mul_add_slow:266 MOV [RSP+-0x258], `mul_low_1 +Xx_5058: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_5059: MOV [RSP+-0x250], R13 # mul_add_slow:266 MOV [RSP+-0x250], `mul_low_2 +Xx_5060: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_5061: MOV [RSP+-0x248], RAX # mul_add_slow:266 MOV [RSP+-0x248], `mul_low_3 +Xx_5062: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5063: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_5064: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5065: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5066: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5067: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_5068: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5069: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5070: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5071: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_5072: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5073: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5074: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5075: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_5076: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5077: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5078: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5079: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5080: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5081: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5082: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5083: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_5084: MOV [RSP+-0x240], R11 # mul_add_slow:266 MOV [RSP+-0x240], `mul_low_0 +Xx_5085: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_5086: MOV [RSP+-0x238], R12 # mul_add_slow:266 MOV [RSP+-0x238], `mul_low_1 +Xx_5087: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_5088: MOV [RSP+-0x230], R13 # mul_add_slow:266 MOV [RSP+-0x230], `mul_low_2 +Xx_5089: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_5090: MOV [RSP+-0x228], RAX # mul_add_slow:266 MOV [RSP+-0x228], `mul_low_3 +Xx_5091: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5092: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_5093: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5094: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5095: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5096: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_5097: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5098: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5099: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5100: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_5101: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5102: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5103: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5104: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_5105: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5106: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5107: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5108: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5109: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5110: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5111: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5112: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_5113: MOV [RSP+-0x220], R11 # mul_add_slow:266 MOV [RSP+-0x220], `mul_low_0 +Xx_5114: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_5115: MOV [RSP+-0x218], R12 # mul_add_slow:266 MOV [RSP+-0x218], `mul_low_1 +Xx_5116: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_5117: MOV [RSP+-0x210], R13 # mul_add_slow:266 MOV [RSP+-0x210], `mul_low_2 +Xx_5118: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_5119: MOV [RSP+-0x208], RAX # mul_add_slow:266 MOV [RSP+-0x208], `mul_low_3 +Xx_5120: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5121: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_5122: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5123: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5124: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5125: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_5126: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5127: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5128: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5129: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_5130: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5131: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5132: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5133: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_5134: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5135: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5136: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5137: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5138: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5139: NOT R11 # mul_add_slow:264 NOT `mul_low_0 +Xx_5140: MOV [RSP+-0x200], R11 # mul_add_slow:266 MOV [RSP+-0x200], `mul_low_0 +Xx_5141: NOT R12 # mul_add_slow:264 NOT `mul_low_1 +Xx_5142: MOV [RSP+-0x1f8], R12 # mul_add_slow:266 MOV [RSP+-0x1f8], `mul_low_1 +Xx_5143: NOT R13 # mul_add_slow:264 NOT `mul_low_2 +Xx_5144: MOV [RSP+-0x1f0], R13 # mul_add_slow:266 MOV [RSP+-0x1f0], `mul_low_2 +Xx_5145: NOT RAX # mul_add_slow:264 NOT `mul_low_3 +Xx_5146: MOV [RSP+-0x1e8], RAX # mul_add_slow:266 MOV [RSP+-0x1e8], `mul_low_3 +Xx_5147: MOV RDI, [RSP+-0x348] # gcd_unsigned:612 MOV `tmp, `spill_mod_u_0 +Xx_5148: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5149: MUL QWORD PTR [RBX+0x0] # mul_add_slow:208 MUL QWORD PTR [RBX+0x0] +Xx_5150: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5151: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5152: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5153: MUL QWORD PTR [RBX+0x8] # mul_add_slow:208 MUL QWORD PTR [RBX+0x8] +Xx_5154: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5155: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5156: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5157: MUL QWORD PTR [RBX+0x10] # mul_add_slow:208 MUL QWORD PTR [RBX+0x10] +Xx_5158: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5159: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5160: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5161: MUL QWORD PTR [RBX+0x18] # mul_add_slow:208 MUL QWORD PTR [RBX+0x18] +Xx_5162: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_5163: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5164: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5165: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5166: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5167: STC # mul_add_slow:243 STC +Xx_5168: ADC R11, [RSP+-0x320] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x320] +Xx_5169: ADC R12, [RSP+-0x318] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x318] +Xx_5170: ADC R13, [RSP+-0x310] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x310] +Xx_5171: ADC RAX, [RSP+-0x308] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x308] +Xx_5172: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5173: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_5174: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_5175: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_5176: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_5177: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5178: MUL QWORD PTR [RBX+0x20] # mul_add_slow:208 MUL QWORD PTR [RBX+0x20] +Xx_5179: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5180: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5181: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5182: MUL QWORD PTR [RBX+0x28] # mul_add_slow:208 MUL QWORD PTR [RBX+0x28] +Xx_5183: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5184: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5185: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5186: MUL QWORD PTR [RBX+0x30] # mul_add_slow:208 MUL QWORD PTR [RBX+0x30] +Xx_5187: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5188: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5189: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5190: MUL QWORD PTR [RBX+0x38] # mul_add_slow:208 MUL QWORD PTR [RBX+0x38] +Xx_5191: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5192: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5193: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5194: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5195: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5196: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5197: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5198: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5199: ADC R11, [RSP+-0x300] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x300] +Xx_5200: ADC R12, [RSP+-0x2f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2f8] +Xx_5201: ADC R13, [RSP+-0x2f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2f0] +Xx_5202: ADC RAX, [RSP+-0x2e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2e8] +Xx_5203: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5204: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_5205: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_5206: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_5207: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_5208: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5209: MUL QWORD PTR [RBX+0x40] # mul_add_slow:208 MUL QWORD PTR [RBX+0x40] +Xx_5210: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5211: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5212: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5213: MUL QWORD PTR [RBX+0x48] # mul_add_slow:208 MUL QWORD PTR [RBX+0x48] +Xx_5214: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5215: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5216: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5217: MUL QWORD PTR [RBX+0x50] # mul_add_slow:208 MUL QWORD PTR [RBX+0x50] +Xx_5218: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5219: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5220: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5221: MUL QWORD PTR [RBX+0x58] # mul_add_slow:208 MUL QWORD PTR [RBX+0x58] +Xx_5222: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5223: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5224: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5225: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5226: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5227: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5228: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5229: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5230: ADC R11, [RSP+-0x2e0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2e0] +Xx_5231: ADC R12, [RSP+-0x2d8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2d8] +Xx_5232: ADC R13, [RSP+-0x2d0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2d0] +Xx_5233: ADC RAX, [RSP+-0x2c8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2c8] +Xx_5234: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5235: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_5236: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_5237: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_5238: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_5239: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5240: MUL QWORD PTR [RBX+0x60] # mul_add_slow:208 MUL QWORD PTR [RBX+0x60] +Xx_5241: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5242: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5243: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5244: MUL QWORD PTR [RBX+0x68] # mul_add_slow:208 MUL QWORD PTR [RBX+0x68] +Xx_5245: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5246: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5247: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5248: MUL QWORD PTR [RBX+0x70] # mul_add_slow:208 MUL QWORD PTR [RBX+0x70] +Xx_5249: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5250: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5251: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5252: MUL QWORD PTR [RBX+0x78] # mul_add_slow:208 MUL QWORD PTR [RBX+0x78] +Xx_5253: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5254: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5255: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5256: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5257: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5258: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5259: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5260: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5261: ADC R11, [RSP+-0x2c0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2c0] +Xx_5262: ADC R12, [RSP+-0x2b8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x2b8] +Xx_5263: ADC R13, [RSP+-0x2b0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x2b0] +Xx_5264: ADC RAX, [RSP+-0x2a8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x2a8] +Xx_5265: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5266: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_5267: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_5268: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_5269: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_5270: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5271: MUL QWORD PTR [RBX+0x80] # mul_add_slow:208 MUL QWORD PTR [RBX+0x80] +Xx_5272: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5273: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5274: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5275: MUL QWORD PTR [RBX+0x88] # mul_add_slow:208 MUL QWORD PTR [RBX+0x88] +Xx_5276: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5277: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5278: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5279: MUL QWORD PTR [RBX+0x90] # mul_add_slow:208 MUL QWORD PTR [RBX+0x90] +Xx_5280: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5281: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5282: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5283: MUL QWORD PTR [RBX+0x98] # mul_add_slow:208 MUL QWORD PTR [RBX+0x98] +Xx_5284: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5285: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5286: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5287: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5288: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5289: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5290: ADC R11, [RSP+-0x2a0] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x2a0] +Xx_5291: ADC R12, [RSP+-0x298] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x298] +Xx_5292: ADC R13, [RSP+-0x290] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x290] +Xx_5293: ADC RAX, [RSP+-0x288] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x288] +Xx_5294: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_5295: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_5296: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_5297: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_5298: MOV RSI, [RSP+-0x328] # gcd_unsigned:616 MOV `addr_new, `spill_addr_b_new +Xx_5299: MOV RDI, [RSP+-0x330] # gcd_unsigned:617 MOV `tmp, `spill_mod_v_1 +Xx_5300: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5301: MUL QWORD PTR [RBP+0x0] # mul_add_slow:208 MUL QWORD PTR [RBP+0x0] +Xx_5302: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5303: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5304: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5305: MUL QWORD PTR [RBP+0x8] # mul_add_slow:208 MUL QWORD PTR [RBP+0x8] +Xx_5306: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5307: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5308: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5309: MUL QWORD PTR [RBP+0x10] # mul_add_slow:208 MUL QWORD PTR [RBP+0x10] +Xx_5310: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5311: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5312: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5313: MUL QWORD PTR [RBP+0x18] # mul_add_slow:208 MUL QWORD PTR [RBP+0x18] +Xx_5314: ADD R12, R14 # mul_add_slow:222 ADD `mul_low_1, `mul_high_0 +Xx_5315: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5316: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5317: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5318: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5319: STC # mul_add_slow:243 STC +Xx_5320: ADC R11, [RSP+-0x280] # mul_add_slow:244 ADC `mul_low_0, [RSP+-0x280] +Xx_5321: ADC R12, [RSP+-0x278] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x278] +Xx_5322: ADC R13, [RSP+-0x270] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x270] +Xx_5323: ADC RAX, [RSP+-0x268] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x268] +Xx_5324: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5325: MOV [RSI+0x0], R11 # mul_add_slow:266 MOV [RSI+0x0], `mul_low_0 +Xx_5326: MOV [RSI+0x8], R12 # mul_add_slow:266 MOV [RSI+0x8], `mul_low_1 +Xx_5327: MOV [RSI+0x10], R13 # mul_add_slow:266 MOV [RSI+0x10], `mul_low_2 +Xx_5328: MOV [RSI+0x18], RAX # mul_add_slow:266 MOV [RSI+0x18], `mul_low_3 +Xx_5329: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5330: MUL QWORD PTR [RBP+0x20] # mul_add_slow:208 MUL QWORD PTR [RBP+0x20] +Xx_5331: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5332: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5333: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5334: MUL QWORD PTR [RBP+0x28] # mul_add_slow:208 MUL QWORD PTR [RBP+0x28] +Xx_5335: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5336: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5337: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5338: MUL QWORD PTR [RBP+0x30] # mul_add_slow:208 MUL QWORD PTR [RBP+0x30] +Xx_5339: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5340: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5341: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5342: MUL QWORD PTR [RBP+0x38] # mul_add_slow:208 MUL QWORD PTR [RBP+0x38] +Xx_5343: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5344: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5345: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5346: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5347: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5348: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5349: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5350: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5351: ADC R11, [RSP+-0x260] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x260] +Xx_5352: ADC R12, [RSP+-0x258] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x258] +Xx_5353: ADC R13, [RSP+-0x250] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x250] +Xx_5354: ADC RAX, [RSP+-0x248] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x248] +Xx_5355: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5356: MOV [RSI+0x20], R11 # mul_add_slow:266 MOV [RSI+0x20], `mul_low_0 +Xx_5357: MOV [RSI+0x28], R12 # mul_add_slow:266 MOV [RSI+0x28], `mul_low_1 +Xx_5358: MOV [RSI+0x30], R13 # mul_add_slow:266 MOV [RSI+0x30], `mul_low_2 +Xx_5359: MOV [RSI+0x38], RAX # mul_add_slow:266 MOV [RSI+0x38], `mul_low_3 +Xx_5360: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5361: MUL QWORD PTR [RBP+0x40] # mul_add_slow:208 MUL QWORD PTR [RBP+0x40] +Xx_5362: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5363: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5364: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5365: MUL QWORD PTR [RBP+0x48] # mul_add_slow:208 MUL QWORD PTR [RBP+0x48] +Xx_5366: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5367: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5368: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5369: MUL QWORD PTR [RBP+0x50] # mul_add_slow:208 MUL QWORD PTR [RBP+0x50] +Xx_5370: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5371: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5372: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5373: MUL QWORD PTR [RBP+0x58] # mul_add_slow:208 MUL QWORD PTR [RBP+0x58] +Xx_5374: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5375: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5376: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5377: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5378: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5379: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5380: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5381: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5382: ADC R11, [RSP+-0x240] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x240] +Xx_5383: ADC R12, [RSP+-0x238] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x238] +Xx_5384: ADC R13, [RSP+-0x230] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x230] +Xx_5385: ADC RAX, [RSP+-0x228] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x228] +Xx_5386: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5387: MOV [RSI+0x40], R11 # mul_add_slow:266 MOV [RSI+0x40], `mul_low_0 +Xx_5388: MOV [RSI+0x48], R12 # mul_add_slow:266 MOV [RSI+0x48], `mul_low_1 +Xx_5389: MOV [RSI+0x50], R13 # mul_add_slow:266 MOV [RSI+0x50], `mul_low_2 +Xx_5390: MOV [RSI+0x58], RAX # mul_add_slow:266 MOV [RSI+0x58], `mul_low_3 +Xx_5391: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5392: MUL QWORD PTR [RBP+0x60] # mul_add_slow:208 MUL QWORD PTR [RBP+0x60] +Xx_5393: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5394: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5395: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5396: MUL QWORD PTR [RBP+0x68] # mul_add_slow:208 MUL QWORD PTR [RBP+0x68] +Xx_5397: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5398: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5399: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5400: MUL QWORD PTR [RBP+0x70] # mul_add_slow:208 MUL QWORD PTR [RBP+0x70] +Xx_5401: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5402: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5403: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5404: MUL QWORD PTR [RBP+0x78] # mul_add_slow:208 MUL QWORD PTR [RBP+0x78] +Xx_5405: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5406: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5407: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5408: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5409: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5410: MOV R10, RDX # mul_add_slow:233 MOV `mul_high_4_previous, `mul_high_3 +Xx_5411: SBB R8, R8 # mul_add_slow:234 SBB `mul_carry, `mul_carry +Xx_5412: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5413: ADC R11, [RSP+-0x220] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x220] +Xx_5414: ADC R12, [RSP+-0x218] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x218] +Xx_5415: ADC R13, [RSP+-0x210] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x210] +Xx_5416: ADC RAX, [RSP+-0x208] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x208] +Xx_5417: SBB R9, R9 # mul_add_slow:258 SBB `add_carry, `add_carry +Xx_5418: MOV [RSI+0x60], R11 # mul_add_slow:266 MOV [RSI+0x60], `mul_low_0 +Xx_5419: MOV [RSI+0x68], R12 # mul_add_slow:266 MOV [RSI+0x68], `mul_low_1 +Xx_5420: MOV [RSI+0x70], R13 # mul_add_slow:266 MOV [RSI+0x70], `mul_low_2 +Xx_5421: MOV [RSI+0x78], RAX # mul_add_slow:266 MOV [RSI+0x78], `mul_low_3 +Xx_5422: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5423: MUL QWORD PTR [RBP+0x80] # mul_add_slow:208 MUL QWORD PTR [RBP+0x80] +Xx_5424: MOV R11, RAX # mul_add_slow:214 MOV `mul_low_0, RAX +Xx_5425: MOV R14, RDX # mul_add_slow:215 MOV `mul_high_0, RDX +Xx_5426: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5427: MUL QWORD PTR [RBP+0x88] # mul_add_slow:208 MUL QWORD PTR [RBP+0x88] +Xx_5428: MOV R12, RAX # mul_add_slow:214 MOV `mul_low_1, RAX +Xx_5429: MOV R15, RDX # mul_add_slow:215 MOV `mul_high_1, RDX +Xx_5430: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5431: MUL QWORD PTR [RBP+0x90] # mul_add_slow:208 MUL QWORD PTR [RBP+0x90] +Xx_5432: MOV R13, RAX # mul_add_slow:214 MOV `mul_low_2, RAX +Xx_5433: MOV RCX, RDX # mul_add_slow:215 MOV `mul_high_2, RDX +Xx_5434: MOV RAX, RDI # mul_add_slow:207 MOV RAX, `b +Xx_5435: MUL QWORD PTR [RBP+0x98] # mul_add_slow:208 MUL QWORD PTR [RBP+0x98] +Xx_5436: ADD R8, 1 # mul_add_slow:224 ADD `mul_carry, 1 +Xx_5437: ADC R11, R10 # mul_add_slow:225 ADC `mul_low_0, `mul_high_4_previous +Xx_5438: ADC R12, R14 # mul_add_slow:226 ADC `mul_low_1, `mul_high_0 +Xx_5439: ADC R13, R15 # mul_add_slow:229 ADC `mul_low_2, `mul_high_1 +Xx_5440: ADC RAX, RCX # mul_add_slow:230 ADC `mul_low_3, `mul_high_2 +Xx_5441: ADD R9, 1 # mul_add_slow:249 ADD `add_carry, 1 +Xx_5442: ADC R11, [RSP+-0x200] # mul_add_slow:250 ADC `mul_low_0, [RSP+-0x200] +Xx_5443: ADC R12, [RSP+-0x1f8] # mul_add_slow:254 ADC `mul_low_1, [RSP+-0x1f8] +Xx_5444: ADC R13, [RSP+-0x1f0] # mul_add_slow:254 ADC `mul_low_2, [RSP+-0x1f0] +Xx_5445: ADC RAX, [RSP+-0x1e8] # mul_add_slow:254 ADC `mul_low_3, [RSP+-0x1e8] +Xx_5446: MOV [RSI+0x80], R11 # mul_add_slow:266 MOV [RSI+0x80], `mul_low_0 +Xx_5447: MOV [RSI+0x88], R12 # mul_add_slow:266 MOV [RSI+0x88], `mul_low_1 +Xx_5448: MOV [RSI+0x90], R13 # mul_add_slow:266 MOV [RSI+0x90], `mul_low_2 +Xx_5449: MOV [RSI+0x98], RAX # mul_add_slow:266 MOV [RSI+0x98], `mul_low_3 +Xx_5450: JMP _label_45 # gcd_unsigned:620 JMP _label_45 +Xx_5451: _label_45: # gcd_unsigned:622 _label_45: +Xx_5452: _label_31: # gcd_unsigned:634 _label_31: +Xx_5453: MOV RBX, [RSP+-0x3b8] # gcd_unsigned:645 MOV `iter, `spill_iter +Xx_5454: TEST RBX, 1 # gcd_unsigned:648 TEST `iter, 1 +Xx_5455: MOV RAX, [RSP+-0x3e8] # gcd_unsigned:649 MOV `addr_a, `spill_a_2_addr_base +Xx_5456: CMOVNZ RAX, [RSP+-0x3f8] # gcd_unsigned:650 CMOVNZ `addr_a, `spill_a_addr_base +Xx_5457: MOV RDX, [RSP+-0x3e0] # gcd_unsigned:653 MOV `addr_b, `spill_b_2_addr_base +Xx_5458: CMOVNZ RDX, [RSP+-0x3f0] # gcd_unsigned:654 CMOVNZ `addr_b, `spill_b_addr_base +Xx_5459: MOV R10, [RSP+-0x3b0] # gcd_unsigned:664 MOV `a_head_start, `spill_a_end_index +Xx_5460: LEA R10, [RAX+R10*8] # update_end_index:48 LEA `end_index, [`addr_base+`end_index*8] +Xx_5461: XOR R15, R15 # update_end_index:50 XOR `tmp_0, `tmp_0 +Xx_5462: MOV RCX, 8 # update_end_index:51 MOV `tmp_8, 8 +Xx_5463: MOV R14, [R10] # update_end_index:63 MOV `tmp_value, [`end_index] +Xx_5464: CMP R14, R15 # update_end_index:67 CMP `tmp_value, `tmp_0 +Xx_5465: MOV R14, R15 # update_end_index:68 MOV `tmp_value, `tmp_0 +Xx_5466: CMOVE R14, RCX # update_end_index:69 CMOVE `tmp_value, `tmp_8 +Xx_5467: CMP R10, RAX # update_end_index:73 CMP `end_index, `addr_base +Xx_5468: CMOVE R14, R15 # update_end_index:74 CMOVE `tmp_value, `tmp_0 +Xx_5469: SUB R10, R14 # update_end_index:78 SUB `end_index, `tmp_value +Xx_5470: _label_47: # update_end_index:60 _label_47: +Xx_5471: MOV R14, [R10] # update_end_index:63 MOV `tmp_value, [`end_index] +Xx_5472: CMP R14, R15 # update_end_index:67 CMP `tmp_value, `tmp_0 +Xx_5473: MOV R14, R15 # update_end_index:68 MOV `tmp_value, `tmp_0 +Xx_5474: CMOVE R14, RCX # update_end_index:69 CMOVE `tmp_value, `tmp_8 +Xx_5475: CMP R10, RAX # update_end_index:73 CMP `end_index, `addr_base +Xx_5476: CMOVE R14, R15 # update_end_index:74 CMOVE `tmp_value, `tmp_0 +Xx_5477: SUB R10, R14 # update_end_index:78 SUB `end_index, `tmp_value +Xx_5478: CMP R14, R15 # update_end_index:82 CMP `tmp_value, `tmp_0 +Xx_5479: JNE _label_47 # update_end_index:83 JNE _label_47 +Xx_5480: SUB R10, RAX # update_end_index:88 SUB `end_index, `addr_base +Xx_5481: SHR R10, 3 # update_end_index:89 SHR `end_index, 3 +Xx_5482: MOV [RSP+-0x3b0], R10 # gcd_unsigned:666 MOV `spill_a_end_index, `a_head_start +Xx_5483: XOR RBP, RBP # gcd_unsigned:670 XOR `is_lehmer, `is_lehmer +Xx_5484: CMP R10, 2 # gcd_unsigned:671 CMP `a_head_start, 2 +Xx_5485: SETAE BPL # gcd_unsigned:672 SETAE `is_lehmer_8 +Xx_5486: MOV [RSP+-0x380], RBP # gcd_unsigned:673 MOV `spill_is_lehmer, `is_lehmer +Xx_5487: XOR R14, R14 # calculate_head_start:103 XOR `tmp, `tmp +Xx_5488: SUB R10, 2 # calculate_head_start:104 SUB `end_index, 2 +Xx_5489: CMOVB R10, R14 # calculate_head_start:105 CMOVB `end_index, `tmp +Xx_5490: LEA R14, [RAX+R10*8+0x0] # extract_head_at:122 LEA `tmp_addr, [`addr_base+`head_start*8+0x0] +Xx_5491: MOV RSI, [R14] # extract_head_at:123 MOV `res_0, [`tmp_addr] +Xx_5492: MOV RDI, [R14+8] # extract_head_at:124 MOV `res_1, [`tmp_addr+8] +Xx_5493: MOV R13, [R14+16] # extract_head_at:125 MOV `res_2, [`tmp_addr+16] +Xx_5494: BSR R11, R13 # calculate_shift_amount:308 BSR `res, `limbs_2 +Xx_5495: INC R11 # calculate_shift_amount:309 INC `res +Xx_5496: XOR R14, R14 # calculate_shift_amount:313 XOR `tmp, `tmp +Xx_5497: CMP R13, R14 # calculate_shift_amount:314 CMP `limbs_2, `tmp +Xx_5498: CMOVE R11, R14 # calculate_shift_amount:315 CMOVE `res, `tmp +Xx_5499: MOV RCX, R11 # shift_right:330 MOV RCX, `amount +Xx_5500: SHRD RSI, RDI, CL # shift_right:334 SHRD `limbs_0, `limbs_1, CL +Xx_5501: SHRD RDI, R13, CL # shift_right:338 SHRD `limbs_1, `limbs_2, CL +Xx_5502: CMP R11, 64 # shift_right:340 CMP `amount, 64 +Xx_5503: CMOVE RSI, RDI # shift_right:341 CMOVE `limbs_0, `limbs_1 +Xx_5504: CMOVE RDI, R13 # shift_right:342 CMOVE `limbs_1, `limbs_2 +Xx_5505: LEA R14, [RDX+R10*8+0x0] # extract_head_at:122 LEA `tmp_addr, [`addr_base+`head_start*8+0x0] +Xx_5506: MOV R8, [R14] # extract_head_at:123 MOV `res_0, [`tmp_addr] +Xx_5507: MOV R9, [R14+8] # extract_head_at:124 MOV `res_1, [`tmp_addr+8] +Xx_5508: MOV R12, [R14+16] # extract_head_at:125 MOV `res_2, [`tmp_addr+16] +Xx_5509: MOV RCX, R11 # shift_right:330 MOV RCX, `amount +Xx_5510: SHRD R8, R9, CL # shift_right:334 SHRD `limbs_0, `limbs_1, CL +Xx_5511: SHRD R9, R12, CL # shift_right:338 SHRD `limbs_1, `limbs_2, CL +Xx_5512: CMP R11, 64 # shift_right:340 CMP `amount, 64 +Xx_5513: CMOVE R8, R9 # shift_right:341 CMOVE `limbs_0, `limbs_1 +Xx_5514: CMOVE R9, R12 # shift_right:342 CMOVE `limbs_1, `limbs_2 +Xx_5515: MOV [RSP+-0x378], RSI # gcd_unsigned:684 MOV `spill_a_128, `a_head_0 +Xx_5516: MOV [RSP+-0x370], RDI # gcd_unsigned:685 MOV `spill_a_128_8, `a_head_1 +Xx_5517: MOV [RSP+-0x368], R8 # gcd_unsigned:687 MOV `spill_b_128, `b_head_0 +Xx_5518: MOV [RSP+-0x360], R9 # gcd_unsigned:688 MOV `spill_b_128_8, `b_head_1 +Xx_5519: MOV RAX, [RSP+-0x3d8] # gcd_unsigned:707 MOV `addr_threshold, `spill_threshold_addr_base +Xx_5520: LEA R15, [RAX+R10*8+0x0] # extract_head_at:122 LEA `tmp_addr, [`addr_base+`head_start*8+0x0] +Xx_5521: MOV RDX, [R15] # extract_head_at:123 MOV `res_0, [`tmp_addr] +Xx_5522: MOV R13, [R15+8] # extract_head_at:124 MOV `res_1, [`tmp_addr+8] +Xx_5523: MOV R14, [R15+16] # extract_head_at:125 MOV `res_2, [`tmp_addr+16] +Xx_5524: MOV RCX, R11 # shift_right:330 MOV RCX, `amount +Xx_5525: SHRD RDX, R13, CL # shift_right:334 SHRD `limbs_0, `limbs_1, CL +Xx_5526: SHRD R13, R14, CL # shift_right:338 SHRD `limbs_1, `limbs_2, CL +Xx_5527: CMP R11, 64 # shift_right:340 CMP `amount, 64 +Xx_5528: CMOVE RDX, R13 # shift_right:341 CMOVE `limbs_0, `limbs_1 +Xx_5529: CMOVE R13, R14 # shift_right:342 CMOVE `limbs_1, `limbs_2 +Xx_5530: MOV [RSP+-0x358], RDX # gcd_unsigned:716 MOV `spill_threshold_128, `threshold_head_0 +Xx_5531: MOV [RSP+-0x350], R13 # gcd_unsigned:717 MOV `spill_threshold_128_8, `threshold_head_1 +Xx_5532: MOV RAX, RDX # gcd_unsigned:720 MOV `addr_threshold, `threshold_head_0 +Xx_5533: MOV R14, R13 # gcd_unsigned:721 MOV `threshold_head_2, `threshold_head_1 +Xx_5534: SUB RAX, RSI # gcd_unsigned:722 SUB `addr_threshold, `a_head_0 +Xx_5535: SBB R14, RDI # gcd_unsigned:723 SBB `threshold_head_2, `a_head_1 +Xx_5536: JNC label_error_2 # gcd_unsigned:724 JNC label_error_2 +Xx_5537: XOR R12, R12 # gcd_unsigned:727 XOR `exit_flag, `exit_flag +Xx_5538: SUB RDX, R8 # gcd_unsigned:728 SUB `threshold_head_0, `b_head_0 +Xx_5539: SBB R13, R9 # gcd_unsigned:729 SBB `threshold_head_1, `b_head_1 +Xx_5540: SETNC R12B # gcd_unsigned:730 SETNC `exit_flag_8 +Xx_5541: OR RDX, R13 # gcd_unsigned:733 OR `threshold_head_0, `threshold_head_1 +Xx_5542: DEC RBP # gcd_unsigned:734 DEC `is_lehmer +Xx_5543: OR RDX, RBP # gcd_unsigned:735 OR `threshold_head_0, `is_lehmer +Xx_5544: JZ label_error_3 # gcd_unsigned:736 JZ label_error_3 +Xx_5545: MOV R13, RBX # gcd_unsigned:751 MOV `out_uv_addr, `iter +Xx_5546: SHL R13, 6 # gcd_unsigned:752 SHL `out_uv_addr, 6 +Xx_5547: ADD R13, [RSP+-0x3c0] # gcd_unsigned:753 ADD `out_uv_addr, `spill_out_uv_addr +Xx_5548: MOV R14, [RSP+-0x3a8] # gcd_unsigned:755 MOV `tmp, `spill_u_0 +Xx_5549: MOV [R13], R14 # gcd_unsigned:756 MOV [`out_uv_addr], `tmp +Xx_5550: MOV R14, [RSP+-0x3a0] # gcd_unsigned:758 MOV `tmp, `spill_u_1 +Xx_5551: MOV [R13+8], R14 # gcd_unsigned:759 MOV [`out_uv_addr+8], `tmp +Xx_5552: MOV R14, [RSP+-0x398] # gcd_unsigned:761 MOV `tmp, `spill_v_0 +Xx_5553: MOV [R13+16], R14 # gcd_unsigned:762 MOV [`out_uv_addr+16], `tmp +Xx_5554: MOV R14, [RSP+-0x390] # gcd_unsigned:764 MOV `tmp, `spill_v_1 +Xx_5555: MOV [R13+24], R14 # gcd_unsigned:765 MOV [`out_uv_addr+24], `tmp +Xx_5556: MOV R14, [RSP+-0x388] # gcd_unsigned:767 MOV `tmp, `spill_parity +Xx_5557: MOV [R13+32], R14 # gcd_unsigned:768 MOV [`out_uv_addr+32], `tmp +Xx_5558: MOV [R13+40], R12 # gcd_unsigned:770 MOV [`out_uv_addr+40], `exit_flag +Xx_5559: MOV R14, [RSP+-0x3d0] # gcd_unsigned:775 MOV `tmp, `spill_uv_counter_start +Xx_5560: ADD R14, RBX # gcd_unsigned:776 ADD `tmp, `iter +Xx_5561: MOV R13, [RSP+-0x3c8] # gcd_unsigned:777 MOV `out_uv_addr, `spill_out_uv_counter_addr +Xx_5562: MOV [R13], R14 # gcd_unsigned:778 MOV [`out_uv_addr], `tmp +Xx_5563: INC RBX # gcd_unsigned:780 INC `iter +Xx_5564: MOV [RSP+-0x3b8], RBX # gcd_unsigned:781 MOV `spill_iter, `iter +Xx_5565: CMP R12, 0 # gcd_unsigned:783 CMP `exit_flag, 0 +Xx_5566: JNE _label_33 # gcd_unsigned:784 JNE _label_33 +Xx_5567: CMP RBX, 0x28 # gcd_unsigned:786 CMP `iter, 0x28 +Xx_5568: JGE label_error_4 # gcd_unsigned:787 JGE label_error_4 +Xx_5569: JMP _label_32 # gcd_unsigned:790 JMP _label_32 +Xx_5570: _label_33: # gcd_unsigned:792 _label_33: +Xx_5571: MOV RBP, [RSP+-0x400] # compile_asm_gcd_unsigned:228 MOV `data_addr, `spill_data_addr +Xx_5572: MOV RBX, [RSP+-0x3f8] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3f8] +Xx_5573: MOV [RBP+0x0], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x0], `tmp +Xx_5574: MOV RBX, [RSP+-0x3f0] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3f0] +Xx_5575: MOV [RBP+0x8], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x8], `tmp +Xx_5576: MOV RBX, [RSP+-0x3e8] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3e8] +Xx_5577: MOV [RBP+0x10], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x10], `tmp +Xx_5578: MOV RBX, [RSP+-0x3e0] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3e0] +Xx_5579: MOV [RBP+0x18], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x18], `tmp +Xx_5580: MOV RBX, [RSP+-0x3d8] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3d8] +Xx_5581: MOV [RBP+0x20], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x20], `tmp +Xx_5582: MOV RBX, [RSP+-0x3d0] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3d0] +Xx_5583: MOV [RBP+0x28], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x28], `tmp +Xx_5584: MOV RBX, [RSP+-0x3c8] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3c8] +Xx_5585: MOV [RBP+0x30], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x30], `tmp +Xx_5586: MOV RBX, [RSP+-0x3c0] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3c0] +Xx_5587: MOV [RBP+0x38], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x38], `tmp +Xx_5588: MOV RBX, [RSP+-0x3b8] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3b8] +Xx_5589: MOV [RBP+0x40], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x40], `tmp +Xx_5590: MOV RBX, [RSP+-0x3b0] # compile_asm_gcd_unsigned:231 MOV `tmp, [RSP+-0x3b0] +Xx_5591: MOV [RBP+0x48], RBX # compile_asm_gcd_unsigned:232 MOV [`data_addr+0x48], `tmp +Xx_5592: MOV RAX, 0 # ~asm_function:107 MOV RAX, 0 +Xx_5593: _label_48: # ~asm_function:110 _label_48: +Xx_5594: MOV RSP, [RSP] # ~asm_function:112 MOV RSP, [RSP] +Xx_5595: POP R15 # ~asm_function:114 POP R15 +Xx_5596: POP R14 # ~asm_function:114 POP R14 +Xx_5597: POP R13 # ~asm_function:114 POP R13 +Xx_5598: POP R12 # ~asm_function:114 POP R12 +Xx_5599: POP RBX # ~asm_function:114 POP RBX +Xx_5600: POP RBP # ~asm_function:114 POP RBP +Xx_5601: RET # ~asm_function:116 RET +Xx_5602: label_error_1: # ~asm_function:119 label_error_1: +Xx_5603: MOV RAX, 0x1 # ~asm_function:122 MOV RAX, 0x1 +Xx_5604: JMP _label_48 # ~asm_function:123 JMP _label_48 +Xx_5605: label_error_2: # ~asm_function:119 label_error_2: +Xx_5606: MOV RAX, 0x2 # ~asm_function:122 MOV RAX, 0x2 +Xx_5607: JMP _label_48 # ~asm_function:123 JMP _label_48 +Xx_5608: label_error_3: # ~asm_function:119 label_error_3: +Xx_5609: MOV RAX, 0x3 # ~asm_function:122 MOV RAX, 0x3 +Xx_5610: JMP _label_48 # ~asm_function:123 JMP _label_48 +Xx_5611: label_error_4: # ~asm_function:119 label_error_4: +Xx_5612: MOV RAX, 0x4 # ~asm_function:122 MOV RAX, 0x4 +Xx_5613: JMP _label_48 # ~asm_function:123 JMP _label_48 diff --git a/lib/chiavdf/fast_vdf/asm_gcd_128.h b/lib/chiavdf/fast_vdf/asm_gcd_128.h new file mode 100644 index 00000000..705ad18f --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_gcd_128.h @@ -0,0 +1,670 @@ +namespace asm_code { + + +typedef array reg_scalar_128; + +//v[0] is low, v[1] is high. amount is >=0 and <128. res can't alias with v +//preserves inputs. returns low part of result +//regs: RCX, 1x scalar +void shift_right( + reg_alloc regs, array v, reg_scalar amount, reg_scalar res, + reg_scalar tmp_rcx, reg_scalar tmp_res_2 +) { + EXPAND_MACROS_SCOPE; + + m.bind(v, "v"); + m.bind(amount, "amount"); + m.bind(res, "res"); + + assert(tmp_rcx.value==reg_rcx.value); + m.bind(tmp_res_2, "res_2"); + + //res=uint64([v[1]:v[0]] >> amount) ; undefined if amount>=64 + APPEND_M(str( "MOV RCX, `amount" )); + APPEND_M(str( "MOV `res, `v_0" )); + APPEND_M(str( "SHRD `res, `v_1, CL" )); + + //res_2=0 + APPEND_M(str( "XOR `res_2, `res_2" )); + + //RCX=amount-64 + APPEND_M(str( "SUB RCX, 64" )); + + //res=(amount>=64)? 0 : res + //res_2=(amount>=64)? v[1] : 0 + APPEND_M(str( "CMOVAE `res, `res_2" )); + APPEND_M(str( "CMOVAE `res_2, `v_1" )); + + //res_2=(amount>=64)? 0 : v[1]>>(amount-64) + APPEND_M(str( "SHR `res_2, CL" )); + + //res=(amount>=64)? res_2 : res + APPEND_M(str( "OR `res, `res_2" )); +} + +//all inputs are unsigned +void dot_product_exact(reg_alloc regs, array a, array b, reg_scalar out, string overflow_label) { + EXPAND_MACROS_SCOPE; + m.bind(a, "a"); + m.bind(b, "b"); + m.bind(out, "out"); + + reg_scalar rax=regs.bind_scalar(m, "rax", reg_rax); + reg_scalar rdx=regs.bind_scalar(m, "rdx", reg_rdx); + + //out=a0*b0 + APPEND_M(str( "MOV RAX, `a_0" )); + APPEND_M(str( "MUL `b_0" )); + APPEND_M(str( "JC #", overflow_label )); + APPEND_M(str( "MOV `out, RAX" )); + + //RAX=a1*b1 + APPEND_M(str( "MOV RAX, `a_1" )); + APPEND_M(str( "MUL `b_1" )); + APPEND_M(str( "JC #", overflow_label )); + + //out=a0*b0+a1*b1 + APPEND_M(str( "ADD `out, RAX" )); + APPEND_M(str( "JC #", overflow_label )); +} + +//ab and ab_threshold reg_spill are 16 bytes (lsb first), 8 byte aligned. all others are 8 bytes +//parity is 1 if odd, else 0 +//is_lehmer is 1 if true, else 0 +//u, v, and parity are outputs +//regs: 15x scalar, 16x vector (i.e. all of the registers except RSP) +void gcd_128( + reg_alloc regs_parent, + array spill_ab_start, array spill_u, array spill_v, + reg_spill spill_parity, reg_spill spill_is_lehmer, reg_spill spill_ab_threshold, + string no_progress_label +) { + EXPAND_MACROS_SCOPE_PUBLIC; + + track_asm( "gcd_128" ); + + m.bind(spill_ab_start[0], "spill_ab_start_0_0"); + m.bind(spill_ab_start[0]+8, "spill_ab_start_0_1"); + m.bind(spill_ab_start[1], "spill_ab_start_1_0"); + m.bind(spill_ab_start[1]+8, "spill_ab_start_1_1"); + + m.bind(spill_u, "spill_u"); + m.bind(spill_v, "spill_v"); + + m.bind(spill_parity, "spill_parity"); + m.bind(spill_is_lehmer, "spill_is_lehmer"); + m.bind(spill_ab_threshold, "spill_ab_threshold_0"); + m.bind(spill_ab_threshold+8, "spill_ab_threshold_1"); + + reg_vector vector_ab=regs_parent.bind_vector(m, "vector_ab"); + reg_vector vector_u=regs_parent.bind_vector(m, "vector_u"); + reg_vector vector_v=regs_parent.bind_vector(m, "vector_v"); + reg_vector vector_is_lehmer=regs_parent.bind_vector(m, "vector_is_lehmer"); + reg_vector vector_ab_threshold=regs_parent.bind_vector(m, "vector_ab_threshold"); + + reg_spill spill_iter=regs_parent.bind_spill(m, "spill_iter"); + + APPEND_M(str( "MOV QWORD PTR `spill_u_0, 1" )); + APPEND_M(str( "MOV QWORD PTR `spill_u_1, 0" )); + APPEND_M(str( "MOV QWORD PTR `spill_v_0, 0" )); + APPEND_M(str( "MOV QWORD PTR `spill_v_1, 1" )); + APPEND_M(str( "MOV QWORD PTR `spill_parity, 0" )); + APPEND_M(str( "MOV QWORD PTR `spill_iter, #", to_hex(gcd_128_max_iter) )); + + string start_label=m.alloc_label(); + string loop_label=m.alloc_label(); + string exit_label=m.alloc_label(); + string exit_iter_0_label=m.alloc_label(); + + string start_assign_label=m.alloc_label(); + APPEND_M(str( "JMP #", start_assign_label )); + + APPEND_M(str( "#:", loop_label )); + + track_asm( "gcd_128 iter" ); + + //4x scalar + reg_scalar new_u_0=regs_parent.bind_scalar(m, "new_u_0"); //a + reg_scalar new_u_1=regs_parent.bind_scalar(m, "new_u_1"); //b + reg_scalar new_v_0=regs_parent.bind_scalar(m, "new_v_0"); //ab_threshold + reg_scalar new_v_1=regs_parent.bind_scalar(m, "new_v_1"); //base iter + + if (use_divide_table) { + string base_exit_label=m.alloc_label(); + string base_loop_label=m.alloc_label(); + + APPEND_M(str( "MOV `new_v_1, #", to_hex(gcd_base_max_iter_divide_table) )); + + APPEND_M(str( "MOVDQA `vector_u, #", constant_address_uint64(1ull, 0ull) )); + APPEND_M(str( "MOVDQA `vector_v, #", constant_address_uint64(0ull, 1ull) )); + + APPEND_M(str( "#:", base_loop_label )); + + gcd_64_iteration(regs_parent, vector_is_lehmer, {new_u_0, new_u_1}, {vector_u, vector_v}, new_v_0, base_exit_label); + + APPEND_M(str( "DEC `new_v_1" )); + APPEND_M(str( "JNZ #", base_loop_label )); + + APPEND_M(str( "#:", base_exit_label )); + APPEND_M(str( "CMP `new_v_1, #", to_hex(gcd_base_max_iter_divide_table) )); + APPEND_M(str( "JE #", track_asm( "gcd_128 base no progress", exit_label ) )); + } else { + gcd_base_continued_fraction( + regs_parent, vector_ab, vector_u, vector_v, vector_is_lehmer, vector_ab_threshold, + track_asm( "gcd_128 base no progress", exit_label ) + ); + } + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + //12x scalar (including dot product exact which is 2x scalar) + reg_scalar m_0_0=regs.bind_scalar(m, "m_0_0"); + reg_scalar m_0_1=regs.bind_scalar(m, "m_0_1"); + reg_scalar m_1_0=regs.bind_scalar(m, "m_1_0"); + reg_scalar m_1_1=regs.bind_scalar(m, "m_1_1"); + reg_scalar tmp_0=regs.bind_scalar(m, "tmp_0"); + reg_scalar tmp_1=regs.bind_scalar(m, "tmp_1"); + reg_vector tmp_a=regs.bind_vector(m, "tmp_a"); + reg_vector tmp_b=regs.bind_vector(m, "tmp_b"); + reg_vector tmp_c=regs.bind_vector(m, "tmp_c"); + reg_vector c_double_abs_mask=regs.bind_vector(m, "double_abs_mask"); + + if (!use_divide_table) { + APPEND_M(str( "MOVAPD `double_abs_mask, #", constant_address_uint64(double_abs_mask, double_abs_mask) )); + } + + auto abs_tmp_a=[&]() { + if (use_divide_table) { + //tmp_b = int64 mask = int64(v)>>63; + APPEND_M(str( "MOVDQA `tmp_b, `tmp_a" )); + APPEND_M(str( "PSRAD `tmp_b, 32" )); //high 32 bits = sign bit ; low 32 bits = undefined + APPEND_M(str( "PSHUFD `tmp_b, `tmp_b, #", to_hex( 0b11110101 ) )); //move high 32 bits to low 32 bits + + //abs_v=(v + mask) ^ mask; + APPEND_M(str( "PADDQ `tmp_a, `tmp_b" )); + APPEND_M(str( "PXOR `tmp_a, `tmp_b" )); + } else { + APPEND_M(str( "PAND `tmp_a, `double_abs_mask" )); + } + }; + + auto mov_low_tmp_a=[&](string target) { + if (use_divide_table) { + APPEND_M(str( "MOVQ `#, `tmp_a", target )); + } else { + APPEND_M(str( "CVTTSD2SI `#, `tmp_a", target )); + } + }; + + //= + //for the divide table, this is u[0] and v[0] + APPEND_M(str( "MOVAPD `tmp_a, `vector_u" )); + abs_tmp_a(); + mov_low_tmp_a( (use_divide_table)? "m_0_0" : "m_0_0" ); + APPEND_M(str( "SHUFPD `tmp_a, `tmp_a, 3" )); + mov_low_tmp_a( (use_divide_table)? "m_0_1" : "m_1_0" ); + + //= + //for the divide table, this is u[1] and v[1] + APPEND_M(str( "MOVAPD `tmp_a, `vector_v" )); + abs_tmp_a(); + mov_low_tmp_a( (use_divide_table)? "m_1_0" : "m_0_1" ); + APPEND_M(str( "SHUFPD `tmp_a, `tmp_a, 3" )); + mov_low_tmp_a( (use_divide_table)? "m_1_1" : "m_1_1" ); + + APPEND_M(str( "MOV `tmp_0, `spill_u_0" )); + APPEND_M(str( "MOV `tmp_1, `spill_u_1" )); + dot_product_exact(regs, {m_0_0, m_0_1}, {tmp_0, tmp_1}, new_u_0, track_asm( "gcd_128 uv overflow", exit_label )); + dot_product_exact(regs, {m_1_0, m_1_1}, {tmp_0, tmp_1}, new_u_1, track_asm( "gcd_128 uv overflow", exit_label )); + + APPEND_M(str( "MOV `tmp_0, `spill_v_0" )); + APPEND_M(str( "MOV `tmp_1, `spill_v_1" )); + dot_product_exact(regs, {m_0_0, m_0_1}, {tmp_0, tmp_1}, new_v_0, track_asm( "gcd_128 uv overflow", exit_label )); + dot_product_exact(regs, {m_1_0, m_1_1}, {tmp_0, tmp_1}, new_v_1, track_asm( "gcd_128 uv overflow", exit_label )); + } + + //9x scalar + reg_scalar new_ab_0_0=regs_parent.bind_scalar(m, "new_ab_0_0"); + reg_scalar new_ab_0_1=regs_parent.bind_scalar(m, "new_ab_0_1"); + reg_scalar new_ab_1_0=regs_parent.bind_scalar(m, "new_ab_1_0"); + reg_scalar new_ab_1_1=regs_parent.bind_scalar(m, "new_ab_1_1"); + reg_scalar new_parity=regs_parent.bind_scalar(m, "new_parity"); + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + //15x scalar + reg_scalar rax=regs.bind_scalar(m, "rax", reg_rax); + reg_scalar rdx=regs.bind_scalar(m, "rdx", reg_rdx); + reg_vector tmp_a=regs.bind_vector(m, "tmp_a"); + + reg_scalar ab_start_0_0=regs.bind_scalar(m, "ab_start_0_0"); + reg_scalar ab_start_0_1=regs.bind_scalar(m, "ab_start_0_1"); + reg_scalar ab_start_1_0=regs.bind_scalar(m, "ab_start_1_0"); + reg_scalar ab_start_1_1=regs.bind_scalar(m, "ab_start_1_1"); + + APPEND_M(str( "MOV `ab_start_0_0, `spill_ab_start_0_0" )); + APPEND_M(str( "MOV `ab_start_0_1, `spill_ab_start_0_1" )); + APPEND_M(str( "MOV `ab_start_1_0, `spill_ab_start_1_0" )); + APPEND_M(str( "MOV `ab_start_1_1, `spill_ab_start_1_1" )); + + //RAX=(uv_double[1][1]<0)? 1 : 0=uv_double_parity + //(this also works for integers with the divide table) + APPEND_M(str( "MOVAPD `tmp_a, `vector_v" )); + APPEND_M(str( "SHUFPD `tmp_a, `tmp_a, 3" )); + APPEND_M(str( "MOVQ RAX, `tmp_a" )); + APPEND_M(str( "SHR RAX, 63" )); + + //new_parity=spill_parity^uv_double_parity + APPEND_M(str( "MOV `new_parity, `spill_parity" )); + APPEND_M(str( "XOR `new_parity, RAX" )); + + //[out1:out0]=[a1:a0]*u - [b1:b0]*v + auto dot_product_subtract=[&](string a0, string a1, string b0, string b1, string u, string v, string out0, string out1) { + //[RDX:RAX]=a0*u + APPEND_M(str( "MOV RAX, `#", a0 )); + APPEND_M(str( "MUL `#", u )); + + //[out1:out0]=a0*u + APPEND_M(str( "MOV `#, RAX", out0 )); + APPEND_M(str( "MOV `#, RDX", out1 )); + + //[RDX:RAX]=a1*u + APPEND_M(str( "MOV RAX, `#", a1 )); + APPEND_M(str( "MUL `#", u )); + + //[out1:out0]=a0*u + (a1*u)<<64=a*u + APPEND_M(str( "ADD `#, RAX", out1 )); + + //[RDX:RAX]=b0*v + APPEND_M(str( "MOV RAX, `#", b0 )); + APPEND_M(str( "MUL `#", v )); + + //[out1:out0]=a*u - b0*v + APPEND_M(str( "SUB `#, RAX", out0 )); + APPEND_M(str( "SBB `#, RDX", out1 )); + + //[RDX:RAX]=b1*v + APPEND_M(str( "MOV RAX, `#", b1 )); + APPEND_M(str( "MUL `#", v )); + + //[out1:out0]=a*u - b0*v - (b1*v)<<64=a*u - b*v + APPEND_M(str( "SUB `#, RAX", out1 )); + }; + + // uint64 uv_00=uv_uint64_new[0][0]; + // uint64 uv_01=uv_uint64_new[0][1]; + // int128 a_new_1=ab_start[0]; a_new_1*=uv_00; + // int128 a_new_2=ab_start[1]; a_new_2*=uv_01; + // if (uv_uint64_parity_new!=0) swap(a_new_1, a_new_2); + // int128 a_new_s=a_new_1-a_new_2; + // uint128 a_new(a_new_s); + dot_product_subtract( + "ab_start_0_0", "ab_start_0_1", + "ab_start_1_0", "ab_start_1_1", + "new_u_0", "new_v_0", + "new_ab_0_0", "new_ab_0_1" + ); + + // uint64 uv_10=uv_uint64_new[1][0]; + // uint64 uv_11=uv_uint64_new[1][1]; + // int128 b_new_1=ab_start[1]; b_new_1*=uv_11; + // int128 b_new_2=ab_start[0]; b_new_2*=uv_10; + // if (uv_uint64_parity_new!=0) swap(b_new_1, b_new_2); + // int128 b_new_s=b_new_1-b_new_2; + // uint128 b_new(b_new_s); + dot_product_subtract( + "ab_start_1_0", "ab_start_1_1", + "ab_start_0_0", "ab_start_0_1", + "new_v_1", "new_u_1", + "new_ab_1_0", "new_ab_1_1" + ); + + APPEND_M(str( "MOV RAX, -1" )); + APPEND_M(str( "ADD RAX, `new_parity" )); //rax=(new_parity==1)? 0 : ~0 + APPEND_M(str( "NOT RAX" )); //rax=(new_parity==1)? ~0 : 0 + + //if (new_parity!=0) { [out1:out0]=-[out1:out0]; } + auto conditional_negate=[&](string out0, string out1) { + //flip all bits if new_parity==1 + APPEND_M(str( "XOR `#, RAX", out0 )); + APPEND_M(str( "XOR `#, RAX", out1 )); + + //add 1 if new_parity==1 + APPEND_M(str( "ADD `#, `new_parity", out0 )); + APPEND_M(str( "ADC `#, 0", out1 )); + }; + + conditional_negate( "new_ab_0_0", "new_ab_0_1" ); + conditional_negate( "new_ab_1_0", "new_ab_1_1" ); + } + + //11x scalar: new_ab, new_u, new_v, new_parity, ab_threshold + reg_scalar ab_threshold_0=regs_parent.bind_scalar(m, "ab_threshold_0"); + reg_scalar ab_threshold_1=regs_parent.bind_scalar(m, "ab_threshold_1"); + + //flags for [a1:a0]-[b1:b0]: + //CMP a0,b0 ; sets CF if b0>a0. clears CF if b0==a0 + //SBB a1,b1 ; sets CF if b>a. sets ZF if b==a. may set ZF if b=b + //need to swap the order for <= and > + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + //15x scalar + reg_scalar ab_delta_0=regs.bind_scalar(m, "ab_delta_0"); + reg_scalar ab_delta_1=regs.bind_scalar(m, "ab_delta_1"); + reg_scalar b_new_min=regs.bind_scalar(m, "b_new_min"); + reg_scalar is_lehmer=regs.bind_scalar(m, "is_lehmer"); + + APPEND_M(str( "MOV `is_lehmer, `spill_is_lehmer" )); + + //uint128 ab_delta=new_ab[0]-new_ab[1] + APPEND_M(str( "MOV `ab_delta_0, `new_ab_0_0" )); + APPEND_M(str( "MOV `ab_delta_1, `new_ab_0_1" )); + APPEND_M(str( "SUB `ab_delta_0, `new_ab_1_0" )); + APPEND_M(str( "SBB `ab_delta_1, `new_ab_1_1" )); + + // assert(a_new>=b_new); + // uint128 ab_delta=a_new-b_new; + // + // even: + // +uv_00 -uv_01 + // -uv_10 +uv_11 + // + // uint128 v_delta=uint128(v_1)+uint128(v_0); //even: positive. odd: negative + // uint128 u_delta=uint128(u_1)+uint128(u_0); //even: negative. odd: positive + // + // uv_10 is negative if even, positive if odd + // uv_11 is positive if even, negative if odd + // bool passed_even=(b_new>=uint128(u_1) && ab_delta>=v_delta); + // bool passed_odd=(b_new>=uint128(v_1) && ab_delta>=u_delta); + + //uint64 uv_delta_0=(even)? new_v_1 : new_u_1; + //uv_delta_0 stored in ab_threshold_0 + APPEND_M(str( "CMP `new_parity, 0" )); + APPEND_M(str( "MOV `ab_threshold_0, `new_u_1" )); + APPEND_M(str( "CMOVE `ab_threshold_0, `new_v_1" )); + + //uint64 uv_delta_1=(even)? new_v_0 : new_u_0; + //uv_delta_1 stored in ab_threshold_1 + APPEND_M(str( "MOV `ab_threshold_1, `new_u_0" )); + APPEND_M(str( "CMOVE `ab_threshold_1, `new_v_0" )); + + //uint64 b_new_min=(even)? new_u_1 : new_v_1; + APPEND_M(str( "MOV `b_new_min, `new_v_1" )); + APPEND_M(str( "CMOVE `b_new_min, `new_u_1" )); + + //if (!is_lehmer) uv_delta=0 + APPEND_M(str( "CMP `is_lehmer, 0" )); + APPEND_M(str( "CMOVE `ab_threshold_0, `is_lehmer" )); //if moved, is_lehmer==0 + APPEND_M(str( "CMOVE `ab_threshold_1, `is_lehmer" )); + + //if (!is_lehmer) b_new_min=0 + APPEND_M(str( "CMOVE `b_new_min, `is_lehmer" )); + + //[uv_delta_1:uv_delta_0]=uv_delta_0 + uv_delta_1 //v_delta if even, else u_delta + APPEND_M(str( "ADD `ab_threshold_0, `ab_threshold_1" )); + APPEND_M(str( "MOV `ab_threshold_1, 0" )); + APPEND_M(str( "ADC `ab_threshold_1, 0" )); + + //if (ab_delta=new_ab[0]) goto exit; + APPEND_M(str( "MOV `ab_delta_0, `ab_threshold_0" )); + APPEND_M(str( "MOV `ab_delta_1, `ab_threshold_1" )); + APPEND_M(str( "SUB `ab_delta_0, `new_ab_0_0" )); + APPEND_M(str( "SBB `ab_delta_1, `new_ab_0_1" )); + APPEND_M(str( "JNC #", track_asm( "gcd_128 went too far ab_threshold>=new_ab[0]", exit_label ) )); + + //u=new_u; + APPEND_M(str( "MOV `spill_u_0, `new_u_0" )); + APPEND_M(str( "MOV `spill_u_1, `new_u_1" )); + + //v=new_v; + APPEND_M(str( "MOV `spill_v_0, `new_v_0" )); + APPEND_M(str( "MOV `spill_v_1, `new_v_1" )); + + //parity=new_parity; + APPEND_M(str( "MOV `spill_parity, `new_parity" )); + + track_asm( "gcd_128 good iter" ); + + //--iter; + //if (iter==0) goto exit; + APPEND_M(str( "MOV `ab_delta_0, `spill_iter" )); + APPEND_M(str( "DEC `ab_delta_0" )); + APPEND_M(str( "MOV `spill_iter, `ab_delta_0" )); + APPEND_M(str( "JZ #", track_asm( "gcd_128 good exit", exit_iter_0_label ) )); + } + + APPEND_M(str( "#:", start_label )); + //11x scalar: new_ab, new_u, new_v, new_parity, ab_threshold + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + //4x scalar + reg_scalar tmp_0=regs.bind_scalar(m, "tmp_0", reg_rax); + reg_scalar tmp_1=regs.bind_scalar(m, "tmp_1", reg_rdx); + reg_scalar tmp_2=regs.bind_scalar(m, "tmp_2"); + reg_scalar tmp_3=regs.bind_scalar(m, "tmp_3", reg_rcx); + + reg_scalar ab_0_0=new_ab_0_0; + reg_scalar ab_0_1=new_ab_0_1; + reg_scalar ab_1_0=new_ab_1_0; + reg_scalar ab_1_1=new_ab_1_1; + + m.bind(new_ab_0_0, "ab_0_0"); + m.bind(new_ab_0_1, "ab_0_1"); + m.bind(new_ab_1_0, "ab_1_0"); + m.bind(new_ab_1_1, "ab_1_1"); + + m.bind(ab_threshold_0, "ab_threshold_0"); + m.bind(ab_threshold_1, "ab_threshold_1"); + + //tmp_3=0 + APPEND_M(str( "XOR `tmp_3, `tmp_3" )); + + //tmp=ab_1-ab_threshold + APPEND_M(str( "MOV `tmp_0, `ab_1_0" )); + APPEND_M(str( "MOV `tmp_1, `ab_1_1" )); + APPEND_M(str( "SUB `tmp_0, `ab_threshold_0" )); + APPEND_M(str( "SBB `tmp_1, `ab_threshold_1" )); + + //if (ab[1] : <0, 0> + APPEND_M(str( "OR `tmp_2, `tmp_1" )); + if (!use_divide_table) { +#ifdef CHIAOSX + APPEND_M(str( "LEA `tmp_3, [RIP+#]", constant_address_uint64(0ull, 0ull, false) )); + APPEND_M(str( "LEA `tmp_0, [RIP+#]", constant_address_uint64(~(0ull), ~(0ull), false) )); +#else + APPEND_M(str( "MOV `tmp_3, OFFSET FLAT:#", constant_address_uint64(0ull, 0ull, false) )); + APPEND_M(str( "MOV `tmp_0, OFFSET FLAT:#", constant_address_uint64(~(0ull), ~(0ull), false) )); +#endif + } else { +#ifdef CHIAOSX + APPEND_M(str( "LEA `tmp_3, [RIP+#]", constant_address_uint64(gcd_mask_exact[0], gcd_mask_exact[1], false) )); + APPEND_M(str( "LEA `tmp_0, [RIP+#]", constant_address_uint64(gcd_mask_approximate[0], gcd_mask_approximate[1], false) )); +#else + APPEND_M(str( "MOV `tmp_3, OFFSET FLAT:#", constant_address_uint64(gcd_mask_exact[0], gcd_mask_exact[1], false) )); + APPEND_M(str( "MOV `tmp_0, OFFSET FLAT:#", constant_address_uint64(gcd_mask_approximate[0], gcd_mask_approximate[1], false) )); +#endif + } + APPEND_M(str( "CMOVZ `tmp_0, `tmp_3" )); + APPEND_M(str( "MOVAPD `vector_is_lehmer, [`tmp_0]" )); + + //vector2 ab_double{ + // double(uint64(ab[0]>>shift_amount)), + // double(uint64(ab[1]>>shift_amount)) + //}; + //double ab_threshold_double(uint64(ab_threshold>>shift_amount)); + //if (shift_amount!=0) { + // ++ab_threshold_double; [can do this with integers because the shifted ab_threshold has to fit in a double exactly] + // a is larger than ab_threshold + //} + + //vector_ab=>shift_amount, undefined> + //also store integer in new_u_1 + shift_right(regs, {ab_1_0, ab_1_1}, tmp_1, new_u_1, tmp_3, tmp_2); + if (!use_divide_table) { + APPEND_M(str( "CVTSI2SD `vector_ab, `new_u_1" )); + } + + //vector_ab=>shift_amount, ab_1>>shift_amount> + if (!use_divide_table) { + APPEND_M(str( "SHUFPD `vector_ab, `vector_ab, 0" )); + } + + //vector_ab=>shift_amount, ab_1>>shift_amount> + //also store integer in new_u_1 + shift_right(regs, {ab_0_0, ab_0_1}, tmp_1, new_u_0, tmp_3, tmp_2); + if (!use_divide_table) { + APPEND_M(str( "CVTSI2SD `vector_ab, `new_u_0" )); + } + + //tmp_0=(ab_threshold>>shift_amount) + //also store integer in new_v_0 + shift_right(regs, {ab_threshold_0, ab_threshold_1}, tmp_1, new_v_0, tmp_3, tmp_2); + + //vector_ab_threshold= + if (!use_divide_table) { + APPEND_M(str( "CVTSI2SD `vector_ab_threshold, `new_v_0" )); + APPEND_M(str( "SHUFPD `vector_ab_threshold, `vector_ab_threshold, 0" )); + } + } + + APPEND_M(str( "JMP #", loop_label )); + + // + // + + APPEND_M(str( "#:", exit_label )); + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + reg_scalar tmp=regs.bind_scalar(m, "tmp"); + + //if (iter==gcd_128_max_iter) goto no_progress + APPEND_M(str( "MOV `tmp, `spill_iter" )); + APPEND_M(str( "CMP `tmp, #", to_hex(gcd_128_max_iter) )); + APPEND_M(str( "JE #", track_asm( "gcd_128 no progress", no_progress_label ) )); + } + APPEND_M(str( "JMP #", track_asm( "gcd_128 premature exit", exit_iter_0_label ) )); + + // + // + + APPEND_M(str( "#:", start_assign_label )); + + APPEND_M(str( "MOV `new_ab_0_0, `spill_ab_start_0_0" )); + APPEND_M(str( "MOV `new_ab_0_1, `spill_ab_start_0_1" )); + APPEND_M(str( "MOV `new_ab_1_0, `spill_ab_start_1_0" )); + APPEND_M(str( "MOV `new_ab_1_1, `spill_ab_start_1_1" )); + APPEND_M(str( "MOV `ab_threshold_0, `spill_ab_threshold_0" )); + APPEND_M(str( "MOV `ab_threshold_1, `spill_ab_threshold_1" )); + + APPEND_M(str( "JMP #", start_label )); + + // + // + + APPEND_M(str( "#:", exit_iter_0_label )); +} + + +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/asm_gcd_base_continued_fractions.h b/lib/chiavdf/fast_vdf/asm_gcd_base_continued_fractions.h new file mode 100644 index 00000000..b3a1b638 --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_gcd_base_continued_fractions.h @@ -0,0 +1,375 @@ +namespace asm_code { + + +const double range_check_range=double((1ull<<53)-1); +const uint64 double_sign_mask=(1ull<<63); +const uint64 double_abs_mask=~double_sign_mask; + +//clobbers v +void range_check( + reg_vector v, reg_vector range, reg_vector c_double_abs_mask, + string out_of_range_label +) { + EXPAND_MACROS_SCOPE; + + m.bind(range, "range"); + m.bind(c_double_abs_mask, "double_abs_mask"); + + m.bind(v, "tmp"); + + //tmp=abs(tmp) + APPEND_M(str( "ANDPD `tmp, `double_abs_mask" )); + + //tmp all 0s if (abs(tmp0)<=range && abs(tmp1)<=range) + APPEND_M(str( "CMPNLEPD `tmp, `range" )); + + //todo //can replace this with POR into an accumulator then use a single PTEST + //todo //can compile the code twice for is_leher being true and false, then branch to the appropriate version + //todo //can probably get rid of the uv range checks if is_lehmer is true + //todo //can get rid of the ab range checks if the table is used and each table uv value has a magnitude less than a certain amount + APPEND_M(str( "PTEST `tmp, `tmp" )); + APPEND_M(str( "JNZ #", out_of_range_label )); +} + +//clobbers b +//this calculates the dot product of each lane separately and puts the result in that lane +void dot_product_exact( + array a, array b, reg_vector v, reg_vector range, reg_vector c_double_abs_mask, + string out_of_range_label, bool result_always_in_range=false +) { + EXPAND_MACROS_SCOPE; + + m.bind(a, "a"); + m.bind(b, "b"); + m.bind(v, "v"); + + APPEND_M(str( "MULPD `b_0, `a_0" )); + APPEND_M(str( "MOVAPD `v, `b_0" )); + //todo //for avx, can get rid of a lot of the MOVs by using the 3-operand versions of the instructions + range_check(b[0], range, c_double_abs_mask, out_of_range_label); + + if (enable_all_instructions) { + APPEND_M(str( "VFMADD231PD `v, `b_1, `a_1" )); + } else { + APPEND_M(str( "MULPD `b_1, `a_1" )); + APPEND_M(str( "ADDPD `v, `b_1" )); + range_check(b[1], range, c_double_abs_mask, out_of_range_label); + } + + if (!result_always_in_range) { + APPEND_M(str( "MOVAPD `b_0, `v" )); + range_check(b[0], range, c_double_abs_mask, out_of_range_label); + } +} + +//ab_threshold is the same for both lanes +//is_lehmer is all 1s if lehmer, else all 0s +//will assign u and v +void gcd_base_continued_fraction( + reg_alloc regs, + reg_vector ab, reg_vector u, reg_vector v, reg_vector is_lehmer, reg_vector ab_threshold, + string no_progress_label +) { + EXPAND_MACROS_SCOPE; + + track_asm( "gcd_base" ); + + static double_table c_table=generate_table(gcd_table_num_exponent_bits, gcd_table_num_fraction_bits); + static bool outputted_table=false; + + if (!outputted_table) { +#ifdef CHIAOSX + APPEND_M(str( ".text " )); +#else + APPEND_M(str( ".text 1" )); +#endif + APPEND_M(str( ".balign 64" )); + APPEND_M(str( "gcd_base_table:" )); + + string table_data; + auto out_double=[&](double v) { + if (!table_data.empty()) { + table_data += ", "; + } + + table_data+=to_hex(*(uint64*)&v); + }; + + //each entry is 32 bytes, 32-aligned + for (continued_fraction c : c_table.data) { + matrix2 mat=c.get_matrix(); + out_double(mat[0][0]); //lane 0 + out_double(mat[1][0]); //lane 1 + out_double(mat[0][1]); //lane 0 + out_double(mat[1][1]); //lane 1 + + APPEND_M(str( ".quad #", table_data )); + table_data.clear(); + } + + APPEND_M(str( ".text" )); + + outputted_table=true; + } + + //5x vector + m.bind(ab, "ab"); + m.bind(u, "u"); + m.bind(v, "v"); + m.bind(is_lehmer, "is_lehmer"); + m.bind(ab_threshold, "ab_threshold"); + + //11x vector + reg_vector m_0=regs.bind_vector(m, "m_0"); + reg_vector m_1=regs.bind_vector(m, "m_1"); + reg_vector new_ab=regs.bind_vector(m, "new_ab"); + reg_vector new_ab_1=regs.bind_vector(m, "new_ab_1"); + reg_vector tmp=regs.bind_vector(m, "tmp"); + reg_vector tmp2=regs.bind_vector(m, "tmp2"); + reg_vector new_u=regs.bind_vector(m, "new_u"); + reg_vector new_v=regs.bind_vector(m, "new_v"); + reg_vector q=regs.bind_vector(m, "q"); + reg_vector c_range_check_range=regs.bind_vector(m, "range_check_range"); + reg_vector c_double_abs_mask=regs.bind_vector(m, "double_abs_mask"); + + reg_scalar q_scalar=regs.bind_scalar(m, "q_scalar"); + reg_scalar q_scalar_2=regs.bind_scalar(m, "q_scalar_2"); + reg_scalar q_scalar_3=regs.bind_scalar(m, "q_scalar_3"); + reg_scalar loop_counter=regs.bind_scalar(m, "loop_counter"); + + reg_scalar c_table_delta_minus_1=regs.bind_scalar(m, "c_table_delta_minus_1"); + APPEND_M(str( "MOV `c_table_delta_minus_1, #", constant_address_uint64(c_table.delta-1, c_table.delta-1) )); + + string exit_label=m.alloc_label(); + string loop_label=m.alloc_label(); + + APPEND_M(str( "MOV `loop_counter, #", to_hex(gcd_base_max_iter) )); + + APPEND_M(str( "MOVAPD `u, #", constant_address_double(1.0, 0.0) )); + APPEND_M(str( "MOVAPD `v, #", constant_address_double(0.0, 1.0) )); + APPEND_M(str( "MOVAPD `range_check_range, #", constant_address_double(range_check_range, range_check_range) )); + APPEND_M(str( "MOVAPD `double_abs_mask, #", constant_address_uint64(double_abs_mask, double_abs_mask) )); + + // q[0]=ab[0]/ab[1] + APPEND_M(str( "MOVAPD `tmp, `ab" )); + APPEND_M(str( "SHUFPD `tmp, `tmp, 3" )); // tmp= + APPEND_M(str( "MOVAPD `q, `ab" )); + APPEND_M(str( "DIVSD `q, `tmp" )); + + { + APPEND_M(str( "#:", loop_label )); + + track_asm( "gcd_base iter" ); + + string no_table_label=m.alloc_label(); + + APPEND_M( "#gcd_base loop start" ); + + //q_scalar=q_scalar_2=to_uint64(ab[0]/ab[1]) + APPEND_M(str( "MOVQ `q_scalar, `q" )); + APPEND_M(str( "MOV `q_scalar_2, `q_scalar" )); + APPEND_M(str( "MOV `q_scalar_3, `q_scalar" )); + + //q_scalar=(to_uint64(ab_0/ab_1)>>c_table.right_shift_amount)<<5 + assert(c_table.right_shift_amount>5); + APPEND_M(str( "SHR `q_scalar, #", to_hex(c_table.right_shift_amount-5) )); + APPEND_M(str( "AND `q_scalar, -32" )); + + // q_scalar-=c_table.range_start_shifted<<5 + // if (q_scalar<0 || q_scalar>=(c_table.range_end_shifted-c_table.range_start_shifted)<<5) goto no_table_label + //this bypasses the "ab[1]<=ab_threshold" check so we need to do it again in no_table_label + APPEND_M(str( "SUB `q_scalar, #", to_hex(c_table.range_start_shifted<<5) )); + APPEND_M(str( "JB #", track_asm( "gcd_base below table start", no_table_label ) )); + APPEND_M(str( "CMP `q_scalar, #", to_hex((c_table.range_end_shifted-c_table.range_start_shifted)<<5) )); + APPEND_M(str( "JAE #", track_asm( "gcd_base after table end", no_table_label ) )); + + //m_0: column 0 + //m_1: column 1 +#ifdef CHIAOSX + APPEND_M(str( "LEA RSI,[RIP+gcd_base_table]")); + APPEND_M(str( "MOVAPD `m_0, [`q_scalar+RSI]" )); + APPEND_M(str( "MOVAPD `m_1, [16+`q_scalar+RSI]" )); +#else + APPEND_M(str( "MOVAPD `m_0, [gcd_base_table+`q_scalar]" )); + APPEND_M(str( "MOVAPD `m_1, [gcd_base_table+16+`q_scalar]" )); +#endif + + //if (ab[1]<=ab_threshold) goto exit_label + //this also tests ab[0], which is >= ab[1] so this does nothing + APPEND_M(str( "MOVAPD `tmp, `ab" )); + APPEND_M(str( "CMPLEPD `tmp, `ab_threshold" )); // tmp all 0s if (ab[0]>ab_threshold[0] && ab[1]>ab_threshold[1]) + APPEND_M(str( "PTEST `tmp, `tmp" )); + APPEND_M(str( "JNZ #", track_asm( "gcd_base ab[1]<=ab_threshold", exit_label ) )); + + //if ( (q_scalar_2&(c_table.delta-1))==0 || (q_scalar_2&(c_table.delta-1))==c_table.delta-1 ) goto no_table_label + APPEND_M(str( "AND `q_scalar_2, `c_table_delta_minus_1" )); + APPEND_M(str( "JZ #", track_asm( "gcd_base on slot boundary", no_table_label ) )); + APPEND_M(str( "CMP `q_scalar_2, `c_table_delta_minus_1" )); + APPEND_M(str( "JE #", track_asm( "gcd_base on slot boundary", no_table_label ) )); + + //assigns: new_ab, new_ab_1, q, new_u, new_v + //reads: m, ab, u, v + //clobbers: tmp + auto calculate_using_m=[&](string fail_label) { + APPEND_M(str( "MOVAPD `tmp, `ab" )); + APPEND_M(str( "SHUFPD `tmp, `tmp, 0" )); + + APPEND_M(str( "MOVAPD `tmp2, `ab" )); + APPEND_M(str( "SHUFPD `tmp2, `tmp2, 3" )); + + dot_product_exact( + {m_0, m_1}, {tmp, tmp2}, new_ab, c_range_check_range, c_double_abs_mask, + track_asm( "gcd_base ab range check failed", fail_label), + true + ); + + APPEND_M(str( "MOVAPD `new_ab_1, `new_ab" )); + APPEND_M(str( "SHUFPD `new_ab_1, `new_ab_1, 3" )); // new_ab_1= + + // q[0]=new_ab[0]/new_ab[1] + // this clobbers q if the table is not used + APPEND_M(str( "MOVAPD `q, `new_ab" )); + APPEND_M(str( "DIVSD `q, `new_ab_1" )); + + APPEND_M(str( "MOVAPD `tmp, `u" )); + APPEND_M(str( "SHUFPD `tmp, `tmp, 0" )); + + APPEND_M(str( "MOVAPD `tmp2, `u" )); + APPEND_M(str( "SHUFPD `tmp2, `tmp2, 3" )); + + dot_product_exact( + {m_0, m_1}, {tmp, tmp2}, new_u, c_range_check_range, c_double_abs_mask, + track_asm( "gcd_base uv range check failed", fail_label) + ); + + //todo //for avx, can replace some shuffles with broadcasts. can make a macro that expands to the proper instructions + APPEND_M(str( "MOVAPD `tmp, `v" )); + APPEND_M(str( "SHUFPD `tmp, `tmp, 0" )); + + APPEND_M(str( "MOVAPD `tmp2, `v" )); + APPEND_M(str( "SHUFPD `tmp2, `tmp2, 3" )); + + dot_product_exact( + {m_0, m_1}, {tmp, tmp2}, new_v, c_range_check_range, c_double_abs_mask, + track_asm( "gcd_base uv range check failed", fail_label) + ); + }; + calculate_using_m(no_table_label); + + //if (new_ab[0]<=ab_threshold) goto no_table_label + APPEND_M(str( "UCOMISD `new_ab, `ab_threshold" )); + APPEND_M(str( "JBE #", track_asm( "gcd_base new_ab[0]<=ab_threshold for table", no_table_label ) )); + + string lehmer_label=m.alloc_label(); + APPEND_M(str( "JMP #", lehmer_label )); + + APPEND_M(str( "#:", no_table_label )); + APPEND_M( "#gcd_base no table" ); + { + track_asm( "gcd_base iter no table" ); + + //have to do this check here because it might have been skipped: if (ab[1]<=ab_threshold) goto exit_label + APPEND_M(str( "MOVAPD `tmp, `ab" )); + APPEND_M(str( "CMPLEPD `tmp, `ab_threshold" )); // tmp all 0s if (ab[0]>ab_threshold[0] && ab[1]>ab_threshold[1]) + APPEND_M(str( "PTEST `tmp, `tmp" )); + APPEND_M(str( "JNZ #", track_asm( "gcd_base ab[1]<=ab_threshold", exit_label ) )); + + //q is clobbered, so need to restore it + APPEND_M(str( "MOVQ `q, `q_scalar_3" )); + + // q=floor(q); + //this requires SSE4. if not present, can also add and subtract a magic number + APPEND_M(str( "ROUNDSD `q, `q, 1" )); //floor + + // m=[0 1] + // 1 -q] + // m_0=<0,1> [column 0] + // m_1=<1,-q> [column 1] + APPEND_M(str( "MOVAPD `m_0, #", constant_address_double(0.0, 1.0) )); + APPEND_M(str( "MOVAPD `m_1, `m_0" )); // m_1=<0,1> + APPEND_M(str( "SUBSD `m_1, `q" )); //m_1=<-q,1> + APPEND_M(str( "SHUFPD `m_1, `m_1, 1" )); //m_1=<1,-q> + + calculate_using_m(exit_label); + } + + APPEND_M(str( "#:", lehmer_label )); + APPEND_M( "#gcd_base end no table" ); + + // new_ab_0= + // new_ab_1= + // ab_delta=new_ab_0-new_ab_1 + + // new_uv_0= + // new_uv_1= + + //bool passed= + // new_ab_1[0]>=-new_uv_1[0] && ab_delta[0]+new_uv_0[0]>=new_uv_1[0] && + // new_ab_1[1]>=-new_uv_1[1] && ab_delta[1]+new_uv_0[1]>=new_uv_1[1] + //; + + //bool passed= + // new_ab_1[0]>=-new_uv_1[0] && ab_delta[0]+new_vu_0[0]>=new_vu_1[0] && + // new_ab_1[1]>=-new_uv_1[1] && ab_delta[1]+new_vu_0[1]>=new_vu_1[1] + //; + + //bool passed= + // new_ab[1]>=-new_u[1] && ab_delta[0]+new_v[0]>=new_v[1] && + // new_ab[1]>=-new_v[1] && ab_delta[0]+new_u[0]>=new_u[1] + //; + + //m_0=new_uv_0= + APPEND_M(str( "MOVAPD `m_0, `new_u" )); + APPEND_M(str( "SHUFPD `m_0, `new_v, 0" )); + + //m_1=new_uv_1= + APPEND_M(str( "MOVAPD `m_1, `new_u" )); + APPEND_M(str( "SHUFPD `m_1, `new_v, 3" )); + + //tmp=new_ab_0= + APPEND_M(str( "MOVAPD `tmp, `new_ab" )); + APPEND_M(str( "SHUFPD `tmp, `tmp, 0" )); + + //tmp=ab_delta=new_ab_0-new_ab_1 + APPEND_M(str( "SUBPD `tmp, `new_ab_1" )); + + //tmp=ab_delta+new_uv_0 + APPEND_M(str( "ADDPD `tmp, `m_0" )); + + //tmp all 0s if (ab_delta[0]+new_uv_0[0]>=new_uv_1[0] && ab_delta[1]+new_uv_0[1]>=new_uv_1[1]) + APPEND_M(str( "CMPLTPD `tmp, `m_1" )); + + //m_1=-new_uv_1 + APPEND_M(str( "XORPD `m_1, #", constant_address_uint64(double_sign_mask, double_sign_mask) )); + + //new_ab_1 all 0s if (new_ab_1[0]>=-new_uv_1[0] && new_ab_1[1]>=-new_uv_1[1]) + APPEND_M(str( "CMPLTPD `new_ab_1, `m_1" )); + + //if (is_lehmer && !(ab_delta[0]+new_uv_0[0]>=new_uv_1[0] && ab_delta[1]+new_uv_0[1]>=new_uv_1[1])) goto exit_label + //if (is_lehmer && !(new_ab_1[0]>=-new_uv_1[0] && new_ab_1[1]>=-new_uv_1[1])) goto exit_label + APPEND_M(str( "ORPD `tmp, `new_ab_1" )); //tmp all 0s if passed is true + APPEND_M(str( "ANDPD `tmp, `is_lehmer" )); //tmp all 0s if passed||(!is_lehmer) is true + APPEND_M(str( "PTEST `tmp, `tmp" )); + APPEND_M(str( "JNZ #", track_asm( "gcd_base lehmer failed", exit_label ) )); + + APPEND_M(str( "MOVAPD `ab, `new_ab" )); + APPEND_M(str( "MOVAPD `u, `new_u" )); + APPEND_M(str( "MOVAPD `v, `new_v" )); + track_asm( "gcd_base good iter" ); + + APPEND_M(str( "DEC `loop_counter" )); + APPEND_M(str( "JNZ #", loop_label )); + + APPEND_M( "#gcd_base loop end" ); + } + + track_asm( "gcd_base good exit" ); + + APPEND_M(str( "#:", exit_label )); + + APPEND_M(str( "CMP `loop_counter, #", to_hex(gcd_base_max_iter) )); + APPEND_M(str( "JE #", track_asm( "gcd_base no progress", no_progress_label ) )); +} + + +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/asm_gcd_base_divide_table.h b/lib/chiavdf/fast_vdf/asm_gcd_base_divide_table.h new file mode 100644 index 00000000..1d4c210d --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_gcd_base_divide_table.h @@ -0,0 +1,185 @@ +namespace asm_code { + + +//regs: 1x scalar (RAX) + 4x scalar arguments (r==RDX) +//todo //test hit rate +void divide_table(reg_alloc regs, reg_scalar a, reg_scalar b, reg_scalar q, reg_scalar r) { + EXPAND_MACROS_SCOPE; + + regs.get_scalar(reg_rax); + + m.bind(a, "a"); + m.bind(b, "b"); + m.bind(q, "q"); + assert(r.value==reg_rdx.value); + + static bool outputted_table=false; + + if (!outputted_table) { +#ifdef CHIAOSX + APPEND_M(str( ".text " )); +#else + APPEND_M(str( ".text 1" )); +#endif + APPEND_M(str( ".balign 64" )); + APPEND_M(str( "divide_table:" )); + + const int expected_size=1<=1); + + int num=0; + auto add=[&](uint64 v) { + APPEND_M(str( ".quad #", to_hex(v) )); + ++num; + }; + + add(0); + for (int index=1;index<=max_index;++index) { + uint128 v = (~uint128(0)) / uint128(index); + v>>=64; + add(v); + } + + assert(num==expected_size); + + APPEND_M(str( ".text" )); + + outputted_table=true; + } + + string b_shift_label=m.alloc_label(); + APPEND_M(str( "BSR `q, `b" )); // b_shift = bsr(b) + APPEND_M(str( "SUB `q, #", to_hex(divide_table_index_bits-1) )); // b_shift = bsr(b)-(divide_table_index_bits-1) + APPEND_M(str( "JNB #", b_shift_label )); + APPEND_M(str( "XOR `q, `q" )); // if (b_shift<0) b_shift=0 + APPEND_M(str( "#:", b_shift_label )); + + APPEND_M(str( "SARX RAX, `b, `q" )); // b_approx = b>>b_shift + APPEND_M(str( "MOV RAX, [divide_table+RAX*8]" )); // b_approx_inverse = divide_table[b_approx] + + APPEND_M(str( "IMUL `a" )); // q = (b_approx_inverse*a)>>64 + APPEND_M(str( "SARX `q, RDX, `q" )); // q = q>>b_shift + + string wrong_remainder_label=m.alloc_label(); + APPEND_M(str( "MOV RAX, `q" )); + APPEND_M(str( "IMUL RAX, `b" )); // r = q*b + APPEND_M(str( "JO #", wrong_remainder_label )); // overflow + APPEND_M(str( "MOV RDX, `a" )); + APPEND_M(str( "SUB RDX, RAX" )); // r = a-q*b + APPEND_M(str( "JO #", wrong_remainder_label )); // overflow + + APPEND_M(str( "CMP RDX, `b" )); + APPEND_M(str( "JAE #", wrong_remainder_label )); // !(r>=0 && r=0 + APPEND_M(str( "ADD RDX, `b" )); // r+=b + APPEND_M(str( "DEC `q" )); + } + + APPEND_M(str( "#:", end_label )); +} + +const array gcd_mask_approximate={1ull<<63, 1ull<<63}; +const array gcd_mask_exact={0, 0}; + +//regs: 3x scalar, 3x vector, 2x scalar argument, 2x vector argument +//uv[0] is: u[0], v[0]. int64 +//uv[1] is: u[1], v[1] +//c_gcd_mask is gcd_mask_approximate or gcd_mask_exact +//a is int64 +void gcd_64_iteration( + reg_alloc regs, reg_vector c_gcd_mask, array a, array uv, reg_scalar ab_threshold, + string early_exit_label +) { + EXPAND_MACROS_SCOPE; + + m.bind(c_gcd_mask, "c_gcd_mask"); + m.bind(a, "a"); + m.bind(uv, "uv"); + m.bind(ab_threshold, "ab_threshold"); + + reg_scalar q=regs.bind_scalar(m, "q"); + reg_scalar r=regs.bind_scalar(m, "r", reg_rdx); + + reg_scalar tmp_a=regs.bind_scalar(m, "tmp_a"); + + //new_uv_0 = uv[1] + reg_vector new_uv_1=regs.bind_vector(m, "new_uv_1"); + reg_vector tmp_1=regs.bind_vector(m, "tmp_1"); + reg_vector tmp_2=regs.bind_vector(m, "tmp_2"); + + APPEND_M(str( "CMP `a_1, `ab_threshold" )); + APPEND_M(str( "JBE #", early_exit_label )); + + divide_table(regs, a[0], a[1], q, r); + APPEND_M(str( "MOV `tmp_a, `q" )); + APPEND_M(str( "SHL `tmp_a, #", to_hex(63-gcd_num_quotient_bits) )); + APPEND_M(str( "SAR `tmp_a, #", to_hex(63-gcd_num_quotient_bits) )); + APPEND_M(str( "CMP `tmp_a, `q" )); + APPEND_M(str( "JNE #", early_exit_label )); //quotient is too big + + APPEND_M(str( "MOV `a_0, `a_1" )); + APPEND_M(str( "MOV `a_1, `r" )); + + APPEND_M(str( "VMOVQ `new_uv_1_128, `q" )); + APPEND_M(str( "VPBROADCASTQ `new_uv_1, `new_uv_1_128" )); // new_uv_1 = q + + APPEND_M(str( "VPMULDQ `new_uv_1, `new_uv_1, `uv_1" )); // new_uv_1 = q*uv[1] + APPEND_M(str( "VPSUBQ `new_uv_1, `uv_0, `new_uv_1" )); // new_uv_1 = uv[0] - q*uv[1] + + //overflow checking: + //-the carry_mask bits must be all 0s or all 1s for each 64-bit entry + //-if 1<=0 and tmp_2>=0 for both lanes) + //exact: ZF set always + APPEND_M(str( "VPTEST `tmp_1, `c_gcd_mask" )); + + APPEND_M(str( "JNZ #", early_exit_label )); //taken if ZF==0 + + //int64 delta=new_a[0]-new_a[1]; + //if (new_a[1]<-new_uv[1]) goto early_exit_label + //if (delta= ab_threshold + APPEND_M(str( "VMOVDQU `uv_1, `new_uv_1" )); +} + + +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/asm_gcd_unsigned.h b/lib/chiavdf/fast_vdf/asm_gcd_unsigned.h new file mode 100644 index 00000000..b36334a7 --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_gcd_unsigned.h @@ -0,0 +1,796 @@ +namespace asm_code { + + +struct asm_integer { + //if a sign limb exists, it is one qword before this address. the data limbs are after this address + reg_scalar addr_base; + + //the asm_integer functions only use addr_base. this is used to assign addr_base if it needs to be allocated + reg_spill addr_base_spill; + + int addr_offset=0; + + bool is_signed=false; + int size=0; //limbs. lsb limb is first. this is a multiple of 4 + + asm_integer() {} + asm_integer(reg_spill t_spill, int t_size) { + addr_base_spill=t_spill; + size=t_size; + } + + string operator[](int pos) { + assert(pos>=0 && pos= the number nonzero of limbs minus 1, but not more than size-1 + //if the integer is 0, end_index should initially be at least 0 and the returned end_index is 0 + //regs: 3x scalar + void update_end_index(reg_alloc regs, reg_scalar end_index) { + EXPAND_MACROS_SCOPE; + + assert(size%4==0); + assert(addr_offset==0); //can temporarily modify addr_base if this is false + + m.bind(end_index, "end_index"); + m.bind(addr_base, "addr_base"); + reg_scalar tmp_value=regs.bind_scalar(m, "tmp_value"); + reg_scalar tmp_0=regs.bind_scalar(m, "tmp_0"); + reg_scalar tmp_8=regs.bind_scalar(m, "tmp_8"); + + //convert index to address + APPEND_M(str( "LEA `end_index, [`addr_base+`end_index*8]" )); + + APPEND_M(str( "XOR `tmp_0, `tmp_0" )); + APPEND_M(str( "MOV `tmp_8, 8" )); + + string loop_label=m.alloc_label(); + + const int num_unroll=2; + assert(num_unroll>=1); + + for (int x=0;x= the actual nonzero size to avoid truncation + //regs: 1x scalar + void extract_head_at(reg_alloc regs, reg_scalar head_start, array res) { + EXPAND_MACROS_SCOPE; + + assert(size%4==0); + + m.bind(addr_base, "addr_base"); + m.bind(head_start, "head_start"); + m.bind(res, "res"); + + reg_scalar tmp_addr=regs.bind_scalar(m, "tmp_addr"); + + APPEND_M(str( "LEA `tmp_addr, [`addr_base+`head_start*8+#]", to_hex(addr_offset) )); + APPEND_M(str( "MOV `res_0, [`tmp_addr]" )); + APPEND_M(str( "MOV `res_1, [`tmp_addr+8]" )); + APPEND_M(str( "MOV `res_2, [`tmp_addr+16]" )); + } + + void mul_add_bmi( + reg_alloc regs, asm_integer a, reg_scalar b, asm_integer c, bool invert_output, bool carry_in_is_1 + ) { + EXPAND_MACROS_SCOPE; + + m.bind(b, "b"); + + //5x scalar + reg_scalar mul_low_0=regs.bind_scalar(m, "mul_low_0"); + reg_scalar mul_low_1=regs.bind_scalar(m, "mul_low_1"); + reg_scalar mul_high_0=regs.bind_scalar(m, "mul_high_0"); + reg_scalar mul_high_1=regs.bind_scalar(m, "mul_high_1"); + reg_scalar rdx=regs.bind_scalar(m, "rdx", reg_rdx); + + //clears OF and CF + APPEND_M(str( "XOR RDX, RDX" )); + + if (carry_in_is_1) { + APPEND_M(str( "STC" )); + } + + APPEND_M(str( "MOV RDX, `b" )); + + for (int pos=0;pos>64 + APPEND_M(str( "MULX `mul_high_0, `mul_low_0, #", a[pos] )); + + if (!first) { + APPEND_M(str( "ADOX `mul_low_0, `mul_high_1" )); + } + + APPEND_M(str( "MULX `mul_high_1, `mul_low_1, #", a[pos+1] )); + APPEND_M(str( "ADOX `mul_low_1, `mul_high_0" )); + + if (!c.is_null()) { + APPEND_M(str( "ADCX `mul_low_0, #", c[pos] )); + APPEND_M(str( "ADCX `mul_low_1, #", c[pos+1] )); + } + + if (invert_output) { + APPEND_M(str( "NOT `mul_low_0" )); + APPEND_M(str( "NOT `mul_low_1" )); + } + + APPEND_M(str( "MOV #, `mul_low_0", (*this)[pos] )); + APPEND_M(str( "MOV #, `mul_low_1", (*this)[pos+1] )); + } + } + + void mul_add_slow( + reg_alloc regs, asm_integer a, reg_scalar b, asm_integer c, bool invert_output, bool carry_in_is_1 + ) { + EXPAND_MACROS_SCOPE; + + m.bind(b, "b"); + + //11x scalar + reg_scalar mul_carry=regs.bind_scalar(m, "mul_carry"); + reg_scalar add_carry=regs.bind_scalar(m, "add_carry"); + reg_scalar mul_high_4_previous=regs.bind_scalar(m, "mul_high_4_previous"); + reg_scalar mul_low_0=regs.bind_scalar(m, "mul_low_0"); + reg_scalar mul_low_1=regs.bind_scalar(m, "mul_low_1"); + reg_scalar mul_low_2=regs.bind_scalar(m, "mul_low_2"); + reg_scalar mul_low_3=regs.bind_scalar(m, "mul_low_3", reg_rax); + reg_scalar mul_high_0=regs.bind_scalar(m, "mul_high_0"); + reg_scalar mul_high_1=regs.bind_scalar(m, "mul_high_1"); + reg_scalar mul_high_2=regs.bind_scalar(m, "mul_high_2"); + reg_scalar mul_high_3=regs.bind_scalar(m, "mul_high_3", reg_rdx); + + for (int pos=0;pos>64 + if (first) { + //mul_carry==0 ; mul_high_4_previous==0 + APPEND_M(str( "ADD `mul_low_1, `mul_high_0" )); + } else { + APPEND_M(str( "ADD `mul_carry, 1" )); // CF=(mul_carry==-1)? 1 : 0 + APPEND_M(str( "ADC `mul_low_0, `mul_high_4_previous" )); + APPEND_M(str( "ADC `mul_low_1, `mul_high_0" )); + } + + APPEND_M(str( "ADC `mul_low_2, `mul_high_1" )); + APPEND_M(str( "ADC `mul_low_3, `mul_high_2" )); + + if (!last) { + APPEND_M(str( "MOV `mul_high_4_previous, `mul_high_3" )); + APPEND_M(str( "SBB `mul_carry, `mul_carry" )); // mul_carry=(CF)? -1 : 0 + } + + if (!c.is_null()) { + //mul_low=mul_low+c + //output mul_low + + if (first) { + if (carry_in_is_1) { + APPEND_M(str( "STC" )); + APPEND_M(str( "ADC `mul_low_0, #", c[pos] )); + } else { + APPEND_M(str( "ADD `mul_low_0, #", c[pos] )); + } + } else { + APPEND_M(str( "ADD `add_carry, 1" )); // CF=(add_carry==-1)? 1 : 0 + APPEND_M(str( "ADC `mul_low_0, #", c[pos] )); + } + + for (int x=1;x<4;++x) { + APPEND_M(str( "ADC `mul_low_#, #", x, c[pos+x] )); + } + + if (!last) { + APPEND_M(str( "SBB `add_carry, `add_carry" )); // add_carry=(CF)? -1 : 0 + } + } + + for (int x=0;x<4;++x) { + if (invert_output) { + APPEND_M(str( "NOT `mul_low_#", x )); + } + APPEND_M(str( "MOV #, `mul_low_#", (*this)[pos+x], x )); + } + } + } + + // (*this)=a*b+c+(carry_in_is_1? 1 : 0) + // if (invert_output) (*this)=~(*this) + //all of the integers must have the same size (which is a multiple of 4) + //a or c can alias with *this (as long as the aliasing is not partial) + //regs: 11x scalar + // + //to calculate a*b-c*d: + //-first calculate ~(c*d) + //-then calculate a*b+(~(c*d))+1 + void mul_add( + reg_alloc regs, asm_integer a, reg_scalar b, asm_integer c, bool invert_output, bool carry_in_is_1 + ) { + EXPAND_MACROS_SCOPE; + + assert(!carry_in_is_1 || !c.is_null()); + assert(size%4==0); + assert(size==a.size && (c.is_null() || size==c.size)); + + if (enable_all_instructions) { + mul_add_bmi(regs, a, b, c, invert_output, carry_in_is_1); + } else { + mul_add_slow(regs, a, b, c, invert_output, carry_in_is_1); + } + } +}; + +//sets res to the right shift amount required for the uppermost limb to be 0. this is between 0 and 64 inclusive +//regs: 1x scalar +void calculate_shift_amount(reg_alloc regs, array limbs, reg_scalar res) { + EXPAND_MACROS_SCOPE; + + m.bind(limbs, "limbs"); + m.bind(res, "res"); + + reg_scalar tmp=regs.bind_scalar(m, "tmp"); + + //res=[first set bit index in limbs_2]+1 + APPEND_M(str( "BSR `res, `limbs_2" )); + APPEND_M(str( "INC `res" )); + + //res=num bits of limbs_2 [which is also the right shift amount] + //(this is 0 if limbs_2 is 0) + APPEND_M(str( "XOR `tmp, `tmp" )); + APPEND_M(str( "CMP `limbs_2, `tmp" )); + APPEND_M(str( "CMOVE `res, `tmp" )); +} + +//amount must be >=0 and <=64 +//this only calculates the lower 2 limbs of the result +//regs: 1x scalar +//in-place +void shift_right(reg_alloc regs, array limbs, reg_scalar amount) { + EXPAND_MACROS_SCOPE; + + m.bind(limbs, "limbs"); + m.bind(amount, "amount"); + + regs.get_scalar(reg_rcx); + + APPEND_M(str( "MOV RCX, `amount" )); + + // if (amount<64) res[0]=[limbs[1]:limbs[0]]>>amount + // if (amount==64) no-op + APPEND_M(str( "SHRD `limbs_0, `limbs_1, CL" )); + + // if (amount<64) res[1]=[limbs[2]:limbs[1]]>>amount + // if (amount==64) no-op + APPEND_M(str( "SHRD `limbs_1, `limbs_2, CL" )); + + APPEND_M(str( "CMP `amount, 64" )); + APPEND_M(str( "CMOVE `limbs_0, `limbs_1" )); + APPEND_M(str( "CMOVE `limbs_1, `limbs_2" )); +} + +//this must be true: a>=b; a>=threshold +// +//all of the integers should have spilled addresses with offsets of 0. all of their sizes should be the same +//the input a and b values should go into spill_a and spill_b. spill_a_2 and spill_b_2 should be uninitialized +//spill_iter will be between -1 and max_iterations +//the final a value is in spill_a if spill_iter is odd, otherwise is is in a_2. same with b +// +//for each iteration, including iteration -1, the following will happen: +//-64 bytes of data is written to *(spill_out_uv_addr + iter*64) +//-then, *spill_uv_counter_addr is set to spill_uv_counter_start+iter +// +//the data has the following format: [u0] [u1] [v0] [v1] [parity] [exit_flag] +//-each entry is 8 bytes +//-if iter is -1, only exit_flag is initialized and the rest have undefined values +//-if exit_flag is 1, this is the final result +// +//no more than max_iterations+1 results will be outputted. there will be an error if there are more results than this +//(this includes iteration -1) +// +//spill_a_end_index must be < a's size and >= 0. any limbs past this must be 0 for a, b, and threshold, but only up to the next +// multiple of 4 limbs. (e.g. if spill_a_end_index is 6, there are 7 limbs so the 8th limb must be 0 and the rest can be uninitialized) +// +//the return value of iter is the total number of iterations performed, which is at least 0. iter-1 is the parity of the last iteration +void gcd_unsigned( + reg_alloc regs_parent, + asm_integer spill_a, asm_integer spill_b, asm_integer spill_a_2, asm_integer spill_b_2, asm_integer spill_threshold, + reg_spill spill_uv_counter_start, reg_spill spill_out_uv_counter_addr, reg_spill spill_out_uv_addr, + reg_spill spill_iter, reg_spill spill_a_end_index, int max_iterations +) { + EXPAND_MACROS_SCOPE_PUBLIC; + + track_asm( "gcd_unsigned" ); + + int int_size=spill_a.size; + assert(spill_a.addr_offset==0 && spill_b.addr_offset==0 && spill_threshold.addr_offset==0); + assert(spill_a.addr_base.value==-1 && spill_b.addr_base.value==-1 && spill_threshold.addr_base.value==-1); + assert(spill_a_2.addr_offset==0 && spill_b_2.addr_offset==0); + assert(spill_a_2.addr_base.value==-1 && spill_b_2.addr_base.value==-1); + assert(spill_a.size==int_size && spill_b.size==int_size && spill_threshold.size==int_size); + assert(spill_a_2.size==int_size && spill_b_2.size==int_size); + + m.bind(spill_a.addr_base_spill, "spill_a_addr_base"); + m.bind(spill_a_2.addr_base_spill, "spill_a_2_addr_base"); + + m.bind(spill_b.addr_base_spill, "spill_b_addr_base"); + m.bind(spill_b_2.addr_base_spill, "spill_b_2_addr_base"); + + m.bind(spill_threshold.addr_base_spill, "spill_threshold_addr_base"); + + m.bind(spill_iter, "spill_iter"); + m.bind(spill_uv_counter_start, "spill_uv_counter_start"); + m.bind(spill_out_uv_addr, "spill_out_uv_addr"); + m.bind(spill_out_uv_counter_addr, "spill_out_uv_counter_addr"); + m.bind(spill_a_end_index, "spill_a_end_index"); + + reg_spill spill_u_0=regs_parent.bind_spill(m, "spill_u_0"); + reg_spill spill_u_1=regs_parent.bind_spill(m, "spill_u_1"); + reg_spill spill_v_0=regs_parent.bind_spill(m, "spill_v_0"); + reg_spill spill_v_1=regs_parent.bind_spill(m, "spill_v_1"); + reg_spill spill_parity=regs_parent.bind_spill(m, "spill_parity"); + reg_spill spill_is_lehmer=regs_parent.bind_spill(m, "spill_is_lehmer"); + + reg_spill spill_a_128=regs_parent.bind_spill(m, "spill_a_128", 16, 8); + reg_spill spill_b_128=regs_parent.bind_spill(m, "spill_b_128", 16, 8); + reg_spill spill_threshold_128=regs_parent.bind_spill(m, "spill_threshold_128", 16, 8); + + m.bind(spill_a_128+8, "spill_a_128_8"); + m.bind(spill_b_128+8, "spill_b_128_8"); + m.bind(spill_threshold_128+8, "spill_threshold_128_8"); + + APPEND_M(str( "MOV QWORD PTR `spill_iter, -1" )); + + string loop_start=m.alloc_label(); + string loop=m.alloc_label(); + string loop_exit=m.alloc_label(); + + APPEND_M(str( "JMP #", loop_start )); + + APPEND_M(str( "#:", loop )); + + //iter even: old_a=a , old_b=b ; new_a=a_2, new_b=b_2 + //iter odd: old_a=a_2, old_b=b_2 ; new_a=a , new_b=b + + gcd_128( + regs_parent, + {spill_a_128, spill_b_128}, {spill_u_0, spill_u_1}, {spill_v_0, spill_v_1}, + spill_parity, spill_is_lehmer, spill_threshold_128, + track_asm( "gcd_unsigned error: gcd 128 stuck", m.alloc_error_label() ) + ); + + string exit_multiply_uv=m.alloc_label(); + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + reg_scalar tmp=regs.bind_scalar(m, "tmp"); + + string jump_table_label=m.alloc_label(); + +#ifdef CHIAOSX + APPEND_M(str( ".text " )); +#else + APPEND_M(str( ".text 1" )); +#endif + APPEND_M(str( ".balign 8" )); + APPEND_M(str( "#:", jump_table_label )); + +#ifdef CHIAOSX + APPEND_M(str( ".text" )); + + APPEND_M(str( "MOV `tmp, `spill_a_end_index" )); + + for (int end_index=0;end_index=2) + //(a_end_index is stored in a_head_start) + APPEND_M(str( "XOR `is_lehmer, `is_lehmer" )); + APPEND_M(str( "CMP `a_head_start, 2" )); + APPEND_M(str( "SETAE `is_lehmer_8" )); + APPEND_M(str( "MOV `spill_is_lehmer, `is_lehmer" )); + + a.calculate_head_start(regs, a_head_start); + + a.extract_head_at(regs, a_head_start, {a_head_0, a_head_1, a_head_2}); + calculate_shift_amount(regs, {a_head_0, a_head_1, a_head_2}, shift_right_amount); + shift_right(regs, {a_head_0, a_head_1, a_head_2}, shift_right_amount); + + b.extract_head_at(regs, a_head_start, {b_head_0, b_head_1, b_head_2}); + shift_right(regs, {b_head_0, b_head_1, b_head_2}, shift_right_amount); + + APPEND_M(str( "MOV `spill_a_128, `a_head_0" )); + APPEND_M(str( "MOV `spill_a_128_8, `a_head_1" )); + + APPEND_M(str( "MOV `spill_b_128, `b_head_0" )); + APPEND_M(str( "MOV `spill_b_128_8, `b_head_1" )); + } + + //9x + //iter, is_lehmer, b_head_0, b_head_1, a_head_start, shift_right_amount + reg_scalar exit_flag=regs_parent.bind_scalar(m, "exit_flag"); + + //clobbers is_lehmer + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + //4x + 1x from called functions + reg_scalar addr_threshold=regs.bind_scalar(m, "addr_threshold", reg_rax); + reg_scalar threshold_head_0=regs.bind_scalar(m, "threshold_head_0", reg_rdx); + reg_scalar threshold_head_1=regs.bind_scalar(m, "threshold_head_1"); + reg_scalar threshold_head_2=regs.bind_scalar(m, "threshold_head_2"); + + //addr_threshold=&threshold + APPEND_M(str( "MOV `addr_threshold, `spill_threshold_addr_base" )); + + asm_integer threshold; + threshold.size=int_size; + threshold.addr_base=addr_threshold; + + threshold.extract_head_at(regs, a_head_start, {threshold_head_0, threshold_head_1, threshold_head_2}); + shift_right(regs, {threshold_head_0, threshold_head_1, threshold_head_2}, shift_right_amount); + + APPEND_M(str( "MOV `spill_threshold_128, `threshold_head_0" )); + APPEND_M(str( "MOV `spill_threshold_128_8, `threshold_head_1" )); + + //if (a_head<=threshold_head) goto error + APPEND_M(str( "MOV `addr_threshold, `threshold_head_0" )); + APPEND_M(str( "MOV `threshold_head_2, `threshold_head_1" )); + APPEND_M(str( "SUB `addr_threshold, `a_head_0" )); + APPEND_M(str( "SBB `threshold_head_2, `a_head_1" )); + APPEND_M(str( "JNC #", track_asm( "gcd_unsigned error: a_head<=threshold_head", m.alloc_error_label() ) )); + + //threshold_head' = threshold_head-b_head + APPEND_M(str( "XOR `exit_flag, `exit_flag" )); + APPEND_M(str( "SUB `threshold_head_0, `b_head_0" )); + APPEND_M(str( "SBB `threshold_head_1, `b_head_1" )); + APPEND_M(str( "SETNC `exit_flag_8" )); //exit_flag = (threshold_head>=b_head) + + //if (b_head==threshold_head && is_lehmer) goto error + APPEND_M(str( "OR `threshold_head_0, `threshold_head_1" )); + APPEND_M(str( "DEC `is_lehmer" )); // is_lehmer'=(is_lehmer)? 0 : ~0 + APPEND_M(str( "OR `threshold_head_0, `is_lehmer" )); //ZF = (threshold_head'==0 && is_lehmer) + APPEND_M(str( "JZ #", track_asm( "gcd_unsigned error: b_head==threshold_head and is_lehmer", m.alloc_error_label() ) )); + } + + //9x + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + //2x + reg_scalar out_uv_addr=regs.bind_scalar(m, "out_uv_addr"); + reg_scalar tmp=regs.bind_scalar(m, "tmp"); + + //out_uv_addr = spill_out_uv_addr + iter*64 + //note: iter can be -1 + APPEND_M(str( "MOV `out_uv_addr, `iter" )); + APPEND_M(str( "SHL `out_uv_addr, 6" )); + APPEND_M(str( "ADD `out_uv_addr, `spill_out_uv_addr" )); + + APPEND_M(str( "MOV `tmp, `spill_u_0" )); + APPEND_M(str( "MOV [`out_uv_addr], `tmp" )); + + APPEND_M(str( "MOV `tmp, `spill_u_1" )); + APPEND_M(str( "MOV [`out_uv_addr+8], `tmp" )); + + APPEND_M(str( "MOV `tmp, `spill_v_0" )); + APPEND_M(str( "MOV [`out_uv_addr+16], `tmp" )); + + APPEND_M(str( "MOV `tmp, `spill_v_1" )); + APPEND_M(str( "MOV [`out_uv_addr+24], `tmp" )); + + APPEND_M(str( "MOV `tmp, `spill_parity" )); + APPEND_M(str( "MOV [`out_uv_addr+32], `tmp" )); + + APPEND_M(str( "MOV [`out_uv_addr+40], `exit_flag" )); + + //done assigning the data; can now increment the counter. this is not atomic because only this thread can write to the counter + //(the counter must be 8-aligned) + //x86 uses acq_rel ordering on all of the loads and stores so no fences are required + APPEND_M(str( "MOV `tmp, `spill_uv_counter_start" )); + APPEND_M(str( "ADD `tmp, `iter" )); + APPEND_M(str( "MOV `out_uv_addr, `spill_out_uv_counter_addr" )); + APPEND_M(str( "MOV [`out_uv_addr], `tmp" )); + + APPEND_M(str( "INC `iter" )); + APPEND_M(str( "MOV `spill_iter, `iter" )); + + APPEND_M(str( "CMP `exit_flag, 0" )); + APPEND_M(str( "JNE #", loop_exit )); + + APPEND_M(str( "CMP `iter, #", to_hex(max_iterations) )); //signed + APPEND_M(str( "JGE #", track_asm( "gcd_unsigned error: max_iterations exceeded", m.alloc_error_label() ) )); + } + + APPEND_M(str( "JMP #", loop )); + + APPEND_M(str( "#:", loop_exit )); +} + + +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/asm_main.h b/lib/chiavdf/fast_vdf/asm_main.h new file mode 100644 index 00000000..8e52d1b3 --- /dev/null +++ b/lib/chiavdf/fast_vdf/asm_main.h @@ -0,0 +1,250 @@ +#ifdef GENERATE_ASM_TRACKING_DATA + #ifndef COMPILE_ASM + extern "C" uint64 asm_tracking_data[num_asm_tracking_data]; + extern "C" char* asm_tracking_data_comments[num_asm_tracking_data]; + + uint64 asm_tracking_data[num_asm_tracking_data]; + char* asm_tracking_data_comments[num_asm_tracking_data]; + #endif +#endif + +namespace asm_code { + + +//all doubles are arrays with 2 entries. the high entry is first followed by the low entry +//so: b, a; u1, u0; v1, v0 +//is_lehmer is all 1s or all 0s. ab_threshold is duplicated twice +extern "C" int asm_func_gcd_base(double* ab, double* u, double* v, uint64* is_lehmer, double* ab_threshold, uint64* no_progress); +#ifdef COMPILE_ASM +void compile_asm_gcd_base() { + EXPAND_MACROS_SCOPE; + + asm_function c_func( "gcd_base", 6 ); + reg_alloc regs=c_func.regs; + + reg_vector ab=regs.bind_vector(m, "ab"); + reg_vector u=regs.bind_vector(m, "u"); + reg_vector v=regs.bind_vector(m, "v"); + reg_vector is_lehmer=regs.bind_vector(m, "is_lehmer"); + reg_vector ab_threshold=regs.bind_vector(m, "ab_threshold"); + + m.bind(c_func.args.at(0), "ab_addr"); + m.bind(c_func.args.at(1), "u_addr"); + m.bind(c_func.args.at(2), "v_addr"); + m.bind(c_func.args.at(3), "is_lehmer_addr"); + m.bind(c_func.args.at(4), "ab_threshold_addr"); + m.bind(c_func.args.at(5), "no_progress_addr"); + + APPEND_M(str( "MOVDQU `ab, [`ab_addr]" )); + APPEND_M(str( "MOVDQU `u, [`u_addr]" )); + APPEND_M(str( "MOVDQU `v, [`v_addr]" )); + APPEND_M(str( "MOVDQU `is_lehmer, [`is_lehmer_addr]" )); + APPEND_M(str( "MOVDQU `ab_threshold, [`ab_threshold_addr]" )); + + string no_progress_label=m.alloc_label(); + string progress_label=m.alloc_label(); + string exit_label=m.alloc_label(); + + gcd_base_continued_fraction(regs, ab, u, v, is_lehmer, ab_threshold, no_progress_label); + APPEND_M(str( "JMP #", progress_label )); + APPEND_M(str( "#:", no_progress_label )); + + APPEND_M(str( "MOV QWORD PTR [`no_progress_addr], 1" )); + APPEND_M(str( "JMP #", exit_label )); + + APPEND_M(str( "#:", progress_label )); + + APPEND_M(str( "MOV QWORD PTR [`no_progress_addr], 0" )); + + APPEND_M(str( "#:", exit_label )); + + APPEND_M(str( "MOVDQU [`ab_addr], `ab" )); + APPEND_M(str( "MOVDQU [`u_addr], `u" )); + APPEND_M(str( "MOVDQU [`v_addr], `v" )); + APPEND_M(str( "MOVDQU [`is_lehmer_addr], `is_lehmer" )); + APPEND_M(str( "MOVDQU [`ab_threshold_addr], `ab_threshold" )); +} +#endif + +//104 bytes +struct asm_func_gcd_128_data { + //4 + uint64 ab_start_0_0; + uint64 ab_start_0_8; + uint64 ab_start_1_0; + uint64 ab_start_1_8; + + //4 + uint64 u_0; + uint64 u_1; + uint64 v_0; + uint64 v_1; + + //5 + uint64 parity; //1 if odd, else 0 + uint64 is_lehmer; //1 if true, else 0 + uint64 ab_threshold_0; + uint64 ab_threshold_8; + uint64 no_progress; +}; + +extern "C" int asm_func_gcd_128(asm_func_gcd_128_data* data); +#ifdef COMPILE_ASM +void compile_asm_gcd_128() { + EXPAND_MACROS_SCOPE_PUBLIC; + + asm_function c_func( "gcd_128", 1 ); + reg_alloc regs_parent=c_func.regs; + + reg_spill spill_data_addr=regs_parent.bind_spill(m, "spill_data_addr"); + reg_spill spill_data=regs_parent.bind_spill(m, "spill_data", sizeof(asm_func_gcd_128_data), 8); + + assert(sizeof(asm_func_gcd_128_data)%8==0); + + { + EXPAND_MACROS_SCOPE; + reg_alloc regs=regs_parent; + + m.bind(c_func.args.at(0), "data_addr"); + + reg_scalar tmp=regs.bind_scalar(m, "tmp"); + + APPEND_M(str( "MOV `spill_data_addr, `data_addr" )); + + for (int x=0;x scalar_register_names_64={ + "RSP", // 0 - stack pointer; used by stack engine etc. not allocated + "RAX", // 1 - temporary; used for mul/div/etc. this is allocated last + "RDX", // 2 - temporary; used for mul/div/etc. allocated 2nd last + "RCX", // 3 - temporary; used for shr/etc. allocated 3rd last + "RBX", // 4 + "RBP", // 5 + "RSI", // 6 + "RDI", // 7 + "R8", // 8 + "R9", // 9 + "R10", // 10 + "R11", // 11 + "R12", // 12 + "R13", // 13 + "R14", // 14 + "R15" // 15 +}; + +const vector scalar_register_names_32={ + "ESP" , "EAX" , "EDX" , "ECX" , + "EBX" , "EBP" , "ESI" , "EDI" , + "R8D" , "R9D" , "R10D", "R11D", + "R12D", "R13D", "R14D", "R15D" +}; + +const vector scalar_register_names_16={ + "SP" , "AX" , "DX" , "CX" , + "BX" , "BP" , "SI" , "DI" , + "R8W" , "R9W" , "R10W", "R11W", + "R12W", "R13W", "R14W", "R15W" +}; + +const vector scalar_register_names_8={ + "SPL" , "AL" , "DL" , "CL" , + "BL" , "BPL" , "SIL" , "DIL" , + "R8B" , "R9B" , "R10B", "R11B", + "R12B", "R13B", "R14B", "R15B" +}; + +string to_hex(int128 i) { + int128 i_abs=(i<0)? -i : i; + assert(i_abs>=0); + assert(uint64(i_abs)==i_abs); + + ostringstream ss; + ss << ((i<0)? "-" : "") << "0x" << hex << uint64(i_abs); + return ss.str(); +} + +void str_impl(vector& out) {} + +template void str_impl( + vector& out, const type_a& a, const types&... targs +) { + out.push_back(to_string(a)); + str_impl(out, targs...); +} + +template string str(const string& t, const types&... targs) { + vector data; + str_impl(data, targs...); + + string res; + int next=0; + for (char c : t) { + if (c=='#') { + res+=data.at(next); + ++next; + } else { + res+=c; + } + } + assert(next==data.size()); + + return res; +} + +struct expand_macros_recording { + int start_pos=-1; + int end_pos=-1; + + ~expand_macros_recording() { + assert((start_pos==-1 && end_pos==-1) || (start_pos!=-1 && end_pos!=-1)); + } +}; + +struct expand_macros { + struct scope_data { + string scope_name; + map name_to_value; + bool is_public=false; + }; + + vector scopes; + map>> value_to_name; //int is scope + + vector> res_text; //first entry is tag + + int next_label_id=0; + int next_error_label_id=1; //can't be 0 since the id is used as the return code + int next_output_error_label_id=1; + + int num_active_recordings=0; + + vector tag_stack; + bool output_tags=false; + + void begin_recording(expand_macros_recording& res) { + assert(res.start_pos==-1 && res.end_pos==-1); + res.start_pos=res_text.size(); + ++num_active_recordings; + } + + vector> end_recording(expand_macros_recording& res) { + assert(res.start_pos!=-1 && res.end_pos==-1); + res.end_pos=res_text.size(); + --num_active_recordings; + + vector> c_text; + for (int x=res.start_pos;x> c_text) { + for (auto& c : c_text) { + res_text.push_back(c); + } + } + + string alloc_label() { + assert(num_active_recordings==0); + string res = "_label_" + to_string(next_label_id); + ++next_label_id; + return res; + } + + string alloc_error_label() { + assert(num_active_recordings==0); + string res = "label_error_" + to_string(next_error_label_id); + ++next_error_label_id; + return res; + } + + void begin_scope(string name, bool is_public=false) { + scopes.emplace_back(scope_data()); + scopes.back().scope_name=name; + scopes.back().is_public=is_public; + } + + void end_scope() { + assert(!scopes.empty()); + for (pair& n : scopes.back().name_to_value) { + bool erase_res=value_to_name.at(n.second).erase(make_pair(scopes.size()-1, n.first)); + assert(erase_res); + } + scopes.pop_back(); + } + + void bind_impl(string name, string value) { + assert(!scopes.empty()); + + bool emplace_res_1=scopes.back().name_to_value.emplace(name, value).second; + assert(emplace_res_1); + + bool emplace_res_2=value_to_name[value].emplace(scopes.size()-1, name).second; + assert(emplace_res_2); + } + + string lookup_value(string name) { + for (int x=scopes.size()-1;x>=0;--x) { + if (x!=scopes.size()-1 && !scopes[x].is_public) { + continue; + } + + auto i=scopes[x].name_to_value.find(name); + if (i!=scopes[x].name_to_value.end()) { + return i->second; + } + } + + assert(false); + return ""; + } + + string describe_scope() { + string res; + for (auto& c : scopes) { + if (!res.empty()) { + res+="/"; + } + res+=c.scope_name; + } + return res; + } + + string describe_name(string name) { + string value=lookup_value(name); + + set>& names=value_to_name.at(value); + + string res; + res+=name; + res+="="; + res+=value; + + if (names.size()>=2) { + res+="("; + bool first=true; + for (auto& c : names) { + if (!first) { + res+=","; + } + if (c.second!=name) { + res+=c.second; + first=false; + } + } + res+=")"; + } + + return res; + } + + pair> expand(string s) { + string res; + vector res_names; + string buffer; + bool in_name=false; + + s+='\0'; + for (char c : s) { + if (in_name) { + if ((c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z') || c=='_') { + buffer+=c; + } else { + in_name=false; + res+=lookup_value(buffer); + res_names.push_back(buffer); + buffer.clear(); + } + } + + if (!in_name) { + if (c=='`') { + in_name=true; + } else { + if (c!='\0') { + res+=c; + } + } + } + } + + return make_pair(res, res_names); + } + + void append(string s, int line, string file, string func) { + bool add_comment=true; + + assert(!s.empty()); + + auto r=expand(s); + + res_text.emplace_back(); + res_text.back().push_back((tag_stack.empty())? "" : tag_stack.back()); + res_text.back().push_back(r.first); + + if (add_comment) { + res_text.back().push_back( " # " + scopes.back().scope_name + ":" + to_string(line) + " " ); + res_text.back().push_back(s); + } + } + + template typename type::bindable bind(const type& a, string n) { + a.bind_impl(*this, n); + } + + template struct void_box { + typedef void value; + }; + + template typename void_box::value bind( + const type& a, string n + ) { + int x=0; + for (const auto& c : a) { + bind(c, n + "_" + to_string(x)); + ++x; + } + } + + string format_res_text() { + string res; + vector sizes; + + int next_line=1; + for (vector& c : res_text) { + string c_tag=c.at(0); + if (output_tags && !c_tag.empty()) { + c_tag = "_" + c_tag; + } + c.at(1)=str( "Xx_##: ", next_line, c_tag ) + c.at(1); + ++next_line; + + + for (int x=1;x& c : res_text) { + for (int x=1;x=0); + + const vector* names=nullptr; + if (num_bits==64) { + names=&scalar_register_names_64; + } else + if (num_bits==32) { + names=&scalar_register_names_32; + } else + if (num_bits==16) { + names=&scalar_register_names_16; + } else { + assert(num_bits==8); + names=&scalar_register_names_8; + } + + if (valuesize()) { + return names->at(value); + } else { + return str( "PSEUDO_#_#", value, num_bits ); + } + } + + typedef void bindable; + void bind_impl(expand_macros& m, string n) const { + m.bind_impl(n, name(64)); + m.bind_impl(n + "_32", name(32)); + m.bind_impl(n + "_16", name(16)); + m.bind_impl(n + "_8", name(8)); + } +}; + +const reg_scalar reg_rsp=reg_scalar(0); +const reg_scalar reg_rax=reg_scalar(1); +const reg_scalar reg_rdx=reg_scalar(2); +const reg_scalar reg_rcx=reg_scalar(3); +const reg_scalar reg_rbx=reg_scalar(4); +const reg_scalar reg_rbp=reg_scalar(5); +const reg_scalar reg_rsi=reg_scalar(6); +const reg_scalar reg_rdi=reg_scalar(7); +const reg_scalar reg_r8=reg_scalar(8); +const reg_scalar reg_r9=reg_scalar(9); +const reg_scalar reg_r10=reg_scalar(10); +const reg_scalar reg_r11=reg_scalar(11); +const reg_scalar reg_r12=reg_scalar(12); +const reg_scalar reg_r13=reg_scalar(13); +const reg_scalar reg_r14=reg_scalar(14); +const reg_scalar reg_r15=reg_scalar(15); + +struct reg_vector { + static const bool is_spill=false; + + int value=-1; + + reg_vector() {} + explicit reg_vector(int i) : value(i) {} + + string name(int num_bits=512) const { + assert(value>=0); + + string prefix; + if (num_bits==512) { + prefix = "Z"; + } else + if (num_bits==256) { + prefix = "Y"; + } else { + assert(num_bits==128); + prefix = "X"; + } + + if (value>=32 || (!enable_all_instructions && (value>=16 || num_bits!=128))) { + prefix = "PSEUDO_" + prefix; + } + + return str( "#MM#", prefix, value ); + } + + typedef void bindable; + void bind_impl(expand_macros& m, string n) const { + m.bind_impl(n, name(128)); + m.bind_impl(n + "_512", name(512)); + m.bind_impl(n + "_256", name(256)); + m.bind_impl(n + "_128", name(128)); + } +}; + +struct reg_spill { + static const bool is_spill=true; + + int value=-1; //byte offset + int size=-1; + int alignment=-1; //power of 2, up to 64 + + reg_spill() {} + reg_spill(int t_value, int t_size, int t_alignment) : value(t_value), size(t_size), alignment(t_alignment) {} + + int get_rsp_offset() const { + return value-spill_bytes; + } + + //this is negative + uint64 get_rsp_offset_uint64() const { + return uint64(value-spill_bytes); + } + + string name() const { + assert(value>=0 && size>=1 && alignment>=1); + assert(value%alignment==0); + assert(value+size<=spill_bytes); + + return str( "[RSP+#]", to_hex(value-spill_bytes) ); + } + + typedef void bindable; + void bind_impl(expand_macros& m, string n) const { + m.bind_impl(n, name()); + m.bind_impl(n + "_rsp_offset", to_hex(value-spill_bytes)); + } + + reg_spill operator+(int byte_offset) const { + reg_spill res=*this; + res.value+=byte_offset; + res.size-=byte_offset; + res.alignment=1; + return res; + } +}; + +struct reg_alloc { + vector order_to_scalar; + vector scalar_to_order; + + set scalars; + set vectors; + vector spills; + + reg_alloc() {} + + void add(reg_scalar s) { + bool insert_res=scalars.insert(scalar_to_order.at(s.value)).second; + assert(insert_res); + } + + void init() { + const int num=32; //defines how many pseudo-registers to have + + order_to_scalar.resize(num, -1); + scalar_to_order.resize(num, -1); + + int next_order=0; + auto add_scalar=[&](reg_scalar scalar_reg) { + int scalar=scalar_reg.value; + + int order=next_order; + ++next_order; + + assert(order_to_scalar.at(order)==-1); + order_to_scalar.at(order)=scalar; + + assert(scalar_to_order.at(scalar)==-1); + scalar_to_order.at(scalar)=order; + + add(reg_scalar(scalar)); + }; + + add_scalar(reg_rbx); + add_scalar(reg_rbp); + add_scalar(reg_rsi); + add_scalar(reg_rdi); + add_scalar(reg_r8); + add_scalar(reg_r9); + add_scalar(reg_r10); + add_scalar(reg_r11); + add_scalar(reg_r12); + add_scalar(reg_r13); + add_scalar(reg_r14); + add_scalar(reg_r15); + + add_scalar(reg_rcx); + add_scalar(reg_rdx); + add_scalar(reg_rax); + + for (int x=16;x=spills.size() || !spills[x+y]) { + valid=false; + break; + } + } + + if (valid) { + for (int y=0;y lanes) { + int res=0; + for (int x=0;x<4;++x) { + int lane=lanes[x]; + assert(lane>=0 && lane<4); + res|=lane << (2*x); + } + return to_hex(res); +} + +string vpblendd_mask_4(array lanes) { + int res=0; + for (int x=0;x<4;++x) { + int lane=lanes[x]; + assert(lane>=0 && lane<2); + res|=((lane==1)? 3 : 0) << (2*x); + } + return to_hex(res); +} + +string vpblendd_mask_8(array lanes) { + int res=0; + for (int x=0;x<8;++x) { + int lane=lanes[x]; + assert(lane>=0 && lane<2); + res|=((lane==1)? 1 : 0) << x; + } + return to_hex(res); +} + +struct asm_function { + string name; + + //this excludes the argument regs (if any). can add them after they are done being used + reg_alloc regs; + + vector args; + + vector pop_regs; + const vector all_save_regs={reg_rbp, reg_rbx, reg_r12, reg_r13, reg_r14, reg_r15}; + const vector all_arg_regs={reg_rdi, reg_rsi, reg_rdx, reg_rcx, reg_r8, reg_r9}; + + //the scratch area ends at RSP (i.e. the last byte is at address RSP-1) + //RSP is 64-byte aligned + //RSP must be preserved but all other registers can be changed + // + //the arguments are stored in: RDI, RSI, RDX, RCX, R8, R9 + //each argument is up to 8 bytes + asm_function(string t_name, int num_args=0, int num_regs=15) { + EXPAND_MACROS_SCOPE; + + static bool outputted_header=false; + if (!outputted_header) { + APPEND_M(str( ".intel_syntax noprefix" )); + outputted_header=true; + } + + name=t_name; + +#ifdef CHIAOSX + APPEND_M(str( ".global _asm_func_#", t_name )); + APPEND_M(str( "_asm_func_#:", t_name )); +#else + APPEND_M(str( ".global asm_func_#", t_name )); + APPEND_M(str( "asm_func_#:", t_name )); +#endif + + assert(num_regs<=15); + regs.init(); + + for (int x=0;xnum_available_regs) { + APPEND_M(str( "PUSH #", s.name() )); + pop_regs.push_back(s); + ++num_available_regs; + } else { + regs.get_scalar(s); + } + } + assert(num_available_regs==num_regs); + + // RSP'=RSP&(~63) ; this makes it 64-aligned and can only reduce its value + // RSP''=RSP'-64 ; still 64-aligned but now there is at least 64 bytes of unused stuff + // [RSP'']=RSP ; store old value in unused area + APPEND_M(str( "MOV RAX, RSP" )); + APPEND_M(str( "AND RSP, -64" )); //-64 equals ~63 + APPEND_M(str( "SUB RSP, 64" )); + APPEND_M(str( "MOV [RSP], RAX" )); + } + + //the return value is the error code (0 if no error). it is put in RAX + ~asm_function() { + EXPAND_MACROS_SCOPE; + + //default return value of 0 + APPEND_M(str( "MOV RAX, 0" )); + + string end_label=m.alloc_label(); + APPEND_M(str( "#:", end_label )); + //this takes 4 cycles including ret, if there is nothing else to do + APPEND_M(str( "MOV RSP, [RSP]" )); + for (int x=pop_regs.size()-1;x>=0;--x) { + APPEND_M(str( "POP #", pop_regs[x].name() )); + } + APPEND_M(str( "RET" )); + + while (m.next_output_error_label_id=0 && size>0 && start+size<=128); + + uint128 v=(uint128(high)<<64) | uint128(low); + v>>=start; + v&=~(uint128(1)<=0 && start<64); + assert(size>=0 && start+size<=64); + + t >>= start; + t &= (1ull<=0 && start<64); + assert(size>=0 && start+size<=64); + assert( + ( bits & ~((1ull<0 && size<64); + assert( + ( bits & ~((1ull<=0;--x) { + bool v=bits&(1ull<=1 && b<2 + double_bits b; + b.set_exponent(0); + b.fraction=i; + + //res_1>=0 && res_1<1 + double res_1=b.to_double(); //1 bitwise or (for the exponent) + + double res=fma(res_1, d_exp2(52), -d_exp2(52)); + + //double_bits res_b=res_1-1; + //res_b.exponent+=52; //can't overflow; 1 uint64 add without shifts. can also use a 32/16 bit add or a double multiply + //double res=res_b.to_double(); + + assert(res==i); + return res; +} + +//can make this handle shifted doubles easily +uint64 int_from_double(double v, bool exact=true) { + if (exact) { + uint64 v_test=v; + assert(v_test==v); + assert(v_test<(1ull<<52)); + } + + double res_1=fma(v, d_exp2(-52), 1); //one fma + + double_bits b(res_1); + uint64 res=b.fraction; //1 bitwise and (for exponent) + + if (exact) { + assert(res==v); + } + return res; +} + +uint64 make_uint64(uint32 high, uint32 low) { + return uint64(high)<<32 | uint32(low); +} + +uint128 make_uint128(uint64 high, uint64 low) { + return uint128(high)<<64 | uint128(low); +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/gcd_128.h b/lib/chiavdf/fast_vdf/gcd_128.h new file mode 100644 index 00000000..0ea76b9a --- /dev/null +++ b/lib/chiavdf/fast_vdf/gcd_128.h @@ -0,0 +1,245 @@ +bool gcd_128( + array& ab, array, 2>& uv_uint64, int& uv_uint64_parity, bool is_lehmer, uint128 ab_threshold=0 +) { + static int test_asm_counter=0; + ++test_asm_counter; + + bool test_asm_run=true; + bool test_asm_print=false; //(test_asm_counter%1000==0); + bool debug_output=false; + + if (debug_output) { + cerr.setf(ios::fixed, ios::floatfield); + //cerr.setf(ios::showpoint); + } + + assert(ab[0]>=ab[1] && ab[1]>=0); + + uv_uint64={ + array{1, 0}, + array{0, 1} + }; + uv_uint64_parity=0; + + array ab_start=ab; + + bool progress=false; + int iter=0; + + while (true) { + if (debug_output) print( + "======== 1:", iter, + uint64(ab[0]), uint64(ab[0]>>64), uint64(ab[1]), uint64(ab[1]>>64), + uint64(ab_threshold), uint64(ab_threshold>>64) + ); + + if (ab[1]<=ab_threshold) { + break; + } + + assert(ab[0]>=ab[1] && ab[1]>=0); + + int a_zeros=0; + + //this uses CMOV + if ((ab[0]>>64)!=0) { + uint64 a_high(ab[0]>>64); + assert(a_high!=0); + a_zeros=__builtin_clzll(a_high); + } else { + uint64 a_low(ab[0]); + assert(a_low!=0); + a_zeros=64+__builtin_clzll(a_low); + } + + int a_num_bits=128-a_zeros; + if (is_lehmer) { + const int min_bits=96; + if (a_num_bits>shift_amount)), + double(uint64(ab[1]>>shift_amount)) + }; + double ab_threshold_double(uint64(ab_threshold>>shift_amount)); + + if (debug_output) print( "3:", ab_double[0], ab_double[1], ab_threshold_double, is_lehmer || (shift_amount!=0) ); + + vector2 ab_double_2=ab_double; + + //this doesn't need to be exact + //all of the comparisons with threshold are >, so this shouldn't be required + //if (shift_amount!=0) { + // ++ab_threshold_double; + //} + + //void gcd_64(vector2 start_a, pair& res, int& num_iterations, bool approximate, int max_iterations) { + //} + + matrix2 uv_double; + if (!gcd_base_continued_fraction(ab_double, uv_double, is_lehmer || (shift_amount!=0), ab_threshold_double)) { + print( " gcd_128 break 1" ); //this is fine + break; + } + + if (debug_output) print( "4:", uv_double[0][0], uv_double[1][0], uv_double[0][1], uv_double[1][1], ab_double[0], ab_double[1] ); + + if (0) { + matrix2 uv_double_2; + if (!gcd_base_continued_fraction_2(ab_double_2, uv_double_2, is_lehmer || (shift_amount!=0), ab_threshold_double)) { + print( " gcd_128 break 2" ); + break; + } + + assert(uv_double==uv_double_2); + assert(ab_double==ab_double_2); + } + + array,2> uv_double_int={ + array{uint64(abs(uv_double[0][0])), uint64(abs(uv_double[0][1]))}, + array{uint64(abs(uv_double[1][0])), uint64(abs(uv_double[1][1]))} + }; + + int uv_double_parity=(uv_double[1][1]<0)? 1 : 0; //sign bit + + array, 2> uv_uint64_new; + if (iter==0) { + uv_uint64_new=uv_double_int; + } else { + if (!multiply_exact(uv_double_int, uv_uint64, uv_uint64_new)) { + print( " gcd_128 slow 1" ); //calculated a bunch of quotients and threw all of them away, which is bad + break; + } + } + + int uv_uint64_parity_new=uv_uint64_parity^uv_double_parity; + bool even=(uv_uint64_parity_new==0); + + if (debug_output) print( + "5:", uv_uint64_new[0][0], uv_uint64_new[1][0], uv_uint64_new[0][1], uv_uint64_new[1][1], uv_uint64_parity_new + ); + + uint64 uv_00=uv_uint64_new[0][0]; + uint64 uv_01=uv_uint64_new[0][1]; + uint64 uv_10=uv_uint64_new[1][0]; + uint64 uv_11=uv_uint64_new[1][1]; + + uint128 a_new_1=ab_start[0]; a_new_1*=uv_00; //a_new_1.set_negative(!even); + uint128 a_new_2=ab_start[1]; a_new_2*=uv_01; //a_new_2.set_negative(even); + uint128 b_new_1=ab_start[1]; b_new_1*=uv_11; //b_new_1.set_negative(!even); + uint128 b_new_2=ab_start[0]; b_new_2*=uv_10; //b_new_2.set_negative(even); + + //CMOV + //print( " gcd_128 even", even ); + if (!even) { + swap(a_new_1, a_new_2); + swap(b_new_1, b_new_2); + } + + uint128 a_new_s=a_new_1-a_new_2; + uint128 b_new_s=b_new_1-b_new_2; + + //if this assert hit, one of the quotients is wrong. the base case is not supposed to return incorrect quotients + //assert(a_new_s>=b_new_s && b_new_s>=0); + //commenting this out because a and b can be 128 bits now + + //if (!(a_new_s>=b_new_s && b_new_s>=0)) { + //print( " gcd_128 slow 2" ); + //break; + //} + + uint128 a_new(a_new_s); + uint128 b_new(b_new_s); + + if (debug_output) print( "6:", uint64(a_new), uint64(a_new>>64), uint64(b_new), uint64(b_new>>64) ); + + if (is_lehmer) { + assert(a_new>=b_new); + uint128 ab_delta=a_new-b_new; + + // even: + // +uv_00 -uv_01 + // -uv_10 +uv_11 + + uint128 u_delta=uint128(uv_10)+uint128(uv_00); //even: negative. odd: positive + uint128 v_delta=uint128(uv_11)+uint128(uv_01); //even: positive. odd: negative + + // uv_10 is negative if even, positive if odd + // uv_11 is positive if even, negative if odd + bool passed_even=(b_new>=uint128(uv_10) && ab_delta>=v_delta); + bool passed_odd=(b_new>=uint128(uv_11) && ab_delta>=u_delta); + + if (debug_output) print( "7:", passed_even, passed_odd ); + + //CMOV + if (!(even? passed_even : passed_odd)) { + print( " gcd_128 slow 5" ); //throwing away a bunch of quotients because the last one is bad + break; + } + } + + if (a_new<=ab_threshold) { + if (debug_output) print( "8:" ); + print( " gcd_128 slow 6" ); //still throwing away quotients + break; + } + + ab={a_new, b_new}; + uv_uint64=uv_uint64_new; + uv_uint64_parity=uv_uint64_parity_new; + progress=true; + + ++iter; + if (iter>=gcd_128_max_iter) { + if (debug_output) print( "9:" ); + break; //this is the only way to exit the loop without wasting quotients + } + + //todo break; + } + + #ifdef TEST_ASM + #ifndef GENERATE_ASM_TRACKING_DATA + if (test_asm_run) { + if (test_asm_print) { + print( "test asm gcd_128", test_asm_counter ); + } + + asm_code::asm_func_gcd_128_data asm_data; + + asm_data.ab_start_0_0=uint64(ab_start[0]); + asm_data.ab_start_0_8=uint64(ab_start[0]>>64); + asm_data.ab_start_1_0=uint64(ab_start[1]); + asm_data.ab_start_1_8=uint64(ab_start[1]>>64); + + asm_data.is_lehmer=uint64(is_lehmer); + asm_data.ab_threshold_0=uint64(ab_threshold); + asm_data.ab_threshold_8=uint64(ab_threshold>>64); + + int error_code=asm_code::asm_func_gcd_128(&asm_data); + + assert(error_code==0); + assert(asm_data.u_0==uv_uint64[0][0]); + assert(asm_data.u_1==uv_uint64[1][0]); + assert(asm_data.v_0==uv_uint64[0][1]); + assert(asm_data.v_1==uv_uint64[1][1]); + assert(asm_data.parity==uv_uint64_parity); + assert(asm_data.no_progress==int(!progress)); + } + #endif + #endif + + return progress; +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/gcd_base_continued_fractions.h b/lib/chiavdf/fast_vdf/gcd_base_continued_fractions.h new file mode 100644 index 00000000..eee1255c --- /dev/null +++ b/lib/chiavdf/fast_vdf/gcd_base_continued_fractions.h @@ -0,0 +1,757 @@ +typedef array vector2; +typedef array matrix2; + +matrix2 identity_matrix() { + return { + vector2{1, 0}, + vector2{0, 1} + }; +} + +matrix2 quotient_matrix(double q) { + assert(int64(q)==q); + + return { + vector2{0, 1}, + vector2{1, -q} + }; +} + +bool range_check(double v) { + //this is the smallest value where you can add 1 exactly + //if you add 2, you get the same value as if you added 1 + //if two floats are added/subtracted and there is a loss of precision, the absolute value of the result will be greater than this + //same with multiplication and fma + //(all of the doubles are integers whether they are exact or not) + return abs(v)<=double((1ull<<53)-1); +} + +bool dot_product_exact(vector2 a, vector2 b, double& v, bool result_always_in_range=false) { + v=a[0]*b[0]; + if (!range_check(v)) { + return false; + } + + if (enable_fma_in_c_code) { + v=fma(a[1], b[1], v); + } else { + double v2=a[1]*b[1]; + if (!range_check(v2)) { + return false; + } + + v+=v2; + } + + if (result_always_in_range) { + //still need the first range_check since the intermediate value might not be in range + assert(range_check(v)); + } + + return range_check(v); +} + +//result_always_in_range ignored +bool dot_product_exact(array a, array b, uint64& v, bool result_always_in_range=false) { + uint64 t1; + if (__builtin_mul_overflow(a[0], b[0], &t1)) { + return false; + } + + uint64 t2; + if (__builtin_mul_overflow(a[1], b[1], &t2)) { + return false; + } + + return !__builtin_add_overflow(t1, t2, &v); +} + +template bool multiply_exact( + array,2> a, array b, array& v, bool result_always_in_range=false) { + return + dot_product_exact(a[0], b, v[0], result_always_in_range) && + dot_product_exact(a[1], b, v[1], result_always_in_range) + ; +} + +template bool multiply_exact( + array,2> a, array,2> b, array,2>& v, bool result_always_in_range=false +) { + return + dot_product_exact(a[0], array{b[0][0], b[1][0]}, v[0][0], result_always_in_range) && + dot_product_exact(a[0], array{b[0][1], b[1][1]}, v[0][1], result_always_in_range) && + dot_product_exact(a[1], array{b[0][0], b[1][0]}, v[1][0], result_always_in_range) && + dot_product_exact(a[1], array{b[0][1], b[1][1]}, v[1][1], result_always_in_range) + ; +} + +struct continued_fraction { + vector values; + + matrix2 get_matrix() { + matrix2 res=identity_matrix(); + + for (int i : values) { + bool is_exact=multiply_exact(quotient_matrix(i), res, res); + assert(is_exact); + } + + return res; + } + + bool truncate(double max_matrix_value) { + bool res=false; + + while (true) { + matrix2 m=get_matrix(); + double max_value=max( + max(abs(m[0][0]), abs(m[0][1])), + max(abs(m[1][0]), abs(m[1][1])) + ); + + if (max_value>max_matrix_value) { + assert(!values.empty()); + values.pop_back(); + res=true; + } else { + break; + } + } + + return res; + } + + bool is_superset_of(continued_fraction& targ) { + if (values.size()>targ.values.size()) { + return false; + } + + for (int x=0;x=0;--x) { + assert(values[x]>=1); + + if (first) { + //the denominator of each fraction is between 1 and infinity + //this is already canonicalized + res=values[x] + (parity? 1 : 0); + } else { + //mpq_class(values[x]) is already canonicalized + res=mpq_class(values[x]) + one/res; + } + + first=false; + } + + double res_double=res.get_d(); + { + mpq_class res_double_mpq(res_double); + res_double_mpq.canonicalize(); + + is_exact=(res_double_mpq==res); + } + return res_double; + } + + //everything inside the bound starts with this continued fraction + //something outside the bound might also start with this continued fraction + //>= first, < second + pair get_bound() { + bool a_exact=false; + double a=get_bound(false, a_exact); + + bool b_exact=false; + double b=get_bound(true, b_exact); + + if (a>b) { + swap(a, b); + swap(a_exact, b_exact); + } + + if (!a_exact) { + //if a isn't exact, the next double value after a is inside the continued fraction (since it got rounded down). this assumes + // the bound isn't so small that it is close to the double machine epsilon; this is checked later by the double_table code + //if a is exact then it is inside the continued fraction + a=nextafter(a, HUGE_VAL); + } + + //if b isn't exact, then it got rounded down and the b value is inside the continued fraction. the next value after b will + // be outside the continued fraction + //if b is exact then it is also inside the continued fraction and the next value is outside + b=nextafter(b, HUGE_VAL); + + return make_pair(a, b); + } +}; + +//if you add 1 to the integer representation of a positive double, it will increase the value by 1 machine epsilon (assuming no overflow) +template struct double_table { + vector data; //data[x] is >= range_start+x*delta and < range_start+(x+1)*delta + + int exponent_bits; + int fraction_bits; + + int64 range_start=0; + int64 range_end=0; + int64 delta=0; + + double range_start_double=0; + double range_end_double=0; + + int right_shift_amount=0; + uint64 range_start_shifted=0; + uint64 range_end_shifted=0; + + //min value is 1 + double_table(int t_exponent_bits, int t_fraction_bits) { + exponent_bits=t_exponent_bits; + fraction_bits=t_fraction_bits; + + assert(exponent_bits>=0); + assert(fraction_bits>=1); + + double_bits range_start_bits; + range_start_bits.sign=false; + range_start_bits.set_exponent(0); + range_start_bits.fraction=0; + range_start=range_start_bits.to_uint64(); + range_start_double=range_start_bits.to_double(); + + double_bits range_end_bits; + range_end_bits.sign=false; + range_end_bits.set_exponent(1<range_start); + assert(range_start%delta==0); + assert(range_end%delta==0); + assert((range_end-range_start)/delta==1ull<<(exponent_bits+fraction_bits)); + + data.resize(1ull<<(exponent_bits+fraction_bits)); + + right_shift_amount=double_bits::fraction_num_bits-fraction_bits; + range_start_shifted=uint64(range_start)>>right_shift_amount; + range_end_shifted=uint64(range_end)>>right_shift_amount; + } + + pair index_range(int x) { + int64 res_low=range_start+x*delta; + int64 res_high=range_start+(x+1)*delta; + return make_pair(*(double*)&res_low, *(double*)&res_high); + } + + bool lookup(double v, type& res) { + assert(v>=1); + + res=type(); + + uint64 v_bits=*(uint64*)&v; + uint64 v_bits_shifted=v_bits>>right_shift_amount; + + assert(v_bits_shifted>=range_start_shifted); //since v>=1 + if (v_bits_shifted=range_end_shifted) { + return false; + } + + //the table doesn't work if v is exactly between two slots + //happens if the remainder is 0 for one of the quotients + if ( + (v_bits & (delta-1)) == 0 || + (v_bits & (delta-1)) == delta-1 + ) { + return false; + } + + res=data.at(v_bits_shifted-range_start_shifted); + return true; + } + + //will assign all entries >= range.first and < range.second + //returns true if the range is at least 0.5 entries wide (for that area of the table) and is within the table bounds + bool assign(pair range, type value, vector& old_values) { + old_values.clear(); + + double start_double=range.first; + double end_double=range.second; + + assert(start_double>0 && end_double>0 && end_double>=start_double && isfinite(start_double) && isfinite(end_double)); + + if (end_doublerange_end_double) { + return false; + } + + int64 start_bits=*(int64*)&start_double; + int64 end_bits=*(int64*)&end_double; + + if (end_bits<=start_bits || 2*(end_bits-start_bits)=start_pos); + + if (start_pos<0) { + start_pos=0; + } + + if (end_pos>data.size()) { + end_pos=data.size(); + } + + for (uint64 pos=start_pos;pos slot_range=index_range(pos); + + //if start_double==slot_range.first, then both ranges have the same starting double so that's fine + //if end_double==slot_range.second, then both ranges have the same ending double which is also fine + if (start_double<=slot_range.first && end_double>=slot_range.second) { + old_values.push_back(data[pos]); + data[pos]=value; + } + } + + return true; + } +}; + +bool add_to_table(double_table& c_table, continued_fraction f) { + vector old_values; + if (!c_table.assign(f.get_bound(), f, old_values)) { + return false; + } + + for (continued_fraction& c : old_values) { + assert(c.is_superset_of(f)); + } + + return true; +} + +void add_children_to_table(double_table& c_table, continued_fraction f) { + f.values.push_back(1); + + while (true) { + if (!add_to_table(c_table, f)) { + break; + } + + add_children_to_table(c_table, f); + + assert(f.values.back() generate_table( + int exponent_bits, int fraction_bits, uint64 truncate_max_value=1ull<<53, bool output_stats=false, bool dump=false +) { + double_table c_table(exponent_bits, fraction_bits); + add_children_to_table(c_table, continued_fraction()); + + bool any_truncated=false; + for (continued_fraction& c : c_table.data) { + assert(double(truncate_max_value)==truncate_max_value); + any_truncated |= c.truncate(truncate_max_value); + } + + //if the exponent has too many bits, some of the table entries will span multiple integers and won't have any entries + //all of the full entries are at the start of the table, and all of the empty entires are at the end. they aren't interleaved + //when setting up the table range checks, should truncate off all of the empty values so they won't affect cache coherency + int num_empty=0; + + for (int x=0;x 0 +// > 0 < 0} +// even parity uv: { >=0 <=0s +// <=0 > 0} + +//if this returns false then the new values are invalid and the old values are valid +//this works if u/v are unsigned, if v[1]-v[0] is replaced with |v[1]|+|v[0]| and -u[1] is replaced with |u1| etc +bool check_lehmer(array a, array u, array v) { + // a[0]-a[1] is always >= 0 ; also a[1]>=0 + // odd parity ; u[0]<=0 ; u[1]> 0 ; v[0]> 0 ; v[1]< 0 + // even parity ; u[0]>=0 ; u[1]<=0 ; v[0]<=0 ; v[1]> 0 + return + a[1]>=-u[1] && int128(a[0])-int128(a[1]) >= int128(v[1])-int128(v[0]) && // even parity + a[1]>=-v[1] && int128(a[0])-int128(a[1]) >= int128(u[1])-int128(u[0]) // odd parity + ; +} + +bool gcd_base_continued_fraction(vector2& ab, matrix2& uv, bool is_lehmer, double ab_threshold=0) { + static double_table c_table=generate_table(gcd_table_num_exponent_bits, gcd_table_num_fraction_bits); + + static int test_asm_counter=0; + ++test_asm_counter; + + bool test_asm_run=true; + bool test_asm_print=false; //(test_asm_counter%1000==0); + bool debug_output=false; + + assert(ab[0]>=ab[1] && ab[1]>=0); + + uv=identity_matrix(); + + auto ab_start=ab; + + bool progress=false; + bool enable_table=true; + + int iter=0; + int iter_table=0; + int iter_slow=0; + + if (debug_output) { + cerr.setf(ios::fixed, ios::floatfield); + //cerr.setf(ios::showpoint); + } + + while (true) { + if (debug_output) print( "======== 1:", iter, ab[1], ab_threshold); + + if (ab[1]<=ab_threshold) { + if (debug_output) print( "1.5:" ); + break; + } + + //print( " gcd_base", uint64(ab[0]) ); + + assert(ab[0]>=ab[1] && ab[1]>=0); + + double q=ab[0]/ab[1]; + + if (debug_output) print( "2:", q ); + + vector2 new_ab; + matrix2 new_uv; + + bool used_table=false; + + continued_fraction f; + if (enable_table && c_table.lookup(q, f)) { + assert(!f.values.empty()); //table should be set up not to have empty values + + if (debug_output) print( "3:", f.get_matrix()[0][0], f.get_matrix()[1][0], f.get_matrix()[0][1], f.get_matrix()[1][1] ); + + bool new_ab_valid=multiply_exact(f.get_matrix(), ab, new_ab, true); //a and b can only be reduced in magnitude + bool new_uv_valid=multiply_exact(f.get_matrix(), uv, new_uv); + bool new_a_valid=(new_ab[0]>ab_threshold); + + if (debug_output) print( "4:", new_ab_valid, new_uv_valid, new_a_valid ); + if (debug_output) print( "5:", new_ab[0], new_ab[1], new_uv[0][0], new_uv[1][0], new_uv[0][1], new_uv[1][1] ); + + if (new_ab_valid && new_uv_valid && new_a_valid) { + used_table=true; + ++iter_table; + } else { + //this should be disabled to make the output the same as the non-table version + //this is disabled in the asm version + //if (is_lehmer && ab_threshold==0) { + //can also bypass the table but it is probably slower + //if ab_threshold is not 0, need to keep going since the partial gcd is about to terminate + //break; + //} + } + } + + if (!used_table) { + //the native instruction is as fast as adding then subtracting some magic number + q=floor(q); + ++iter_slow; + + if (debug_output) print( "6:", q ); + + matrix2 m=quotient_matrix(q); + + bool new_ab_valid=multiply_exact(m, ab, new_ab, true); + bool new_uv_valid=multiply_exact(m, uv, new_uv); + + if (debug_output) print( "6.5:", new_ab[0], new_ab[1], new_uv[0][0], new_uv[1][0], new_uv[0][1], new_uv[1][1] ); + + if (!new_ab_valid || !new_uv_valid) { + if (debug_output) print( "7:" ); + break; + } + + //double new_b=fma(-q, ab[1], ab[0]); + + //double new_u; + //double new_v; + + //iter 0 is unrolled separately + //can probably just unroll all 6 iterations + //if (iter==0) { + //new_u=1; + //new_v=-q; + //} else {//} + + //new_u=fma(-q, uv[1][0], uv[0][0]); + //new_v=fma(-q, uv[1][1], uv[0][1]); + + //if (debug_output) print( "6:", q, new_b, new_u, new_v ); + + //if (!range_check(new_u) || !range_check(new_v)) { + //if (debug_output) print( "7" ); + //break; + //} + + //assert(range_check(new_b)); //a and b can only be reduced in magnitude + + //new_ab={ab[1], new_b}; + //new_uv={ + //vector2{uv[1][0], uv[1][1]}, + //vector2{ new_u, new_v} + //}; + } + + //this has to be checked on the first iteration if the table is not used (since there could be a giant quotient e.g. a=b) + //will check it even if the table is used. shouldn't affect performance + if (is_lehmer) { + double ab_delta=new_ab[0]-new_ab[1]; + assert(range_check(ab_delta)); //both are nonnegative so the subtraction can't increase the magnitude + assert(ab_delta>=0); //ab[0] has to be greater + + //the magnitudes add for these + //however, the comparison is ab_delta >= u_delta or v_delta, and ab_delta>=0, so the values of u_delta and v_delta can + // be increased. if the calculation is not exact, the values will be ceil'ed so they are exact or increased; never reduced + //double u_delta=uv[1][0]-uv[0][0]; + //double v_delta=uv[1][1]-uv[0][1]; + + //even parity: + //don't care what the result of the odd comparison is as far as correctness goes. for performance, it has to be true most + // of the time + // uv[0][1]<=0 ; uv[1][1]>=0 + //ab_delta+uv[0][1] is exact because the signs are opposite + //ab_delta+uv[0][0] is <= the true value so the comparison might return false wrongly. should be fine + + bool even=(new_uv[1][1]>=0); + + if (even) { + assert(range_check(ab_delta+new_uv[0][1])); + } else { + assert(range_check(ab_delta+new_uv[0][0])); + } + + bool passed= + new_ab[1]>=-new_uv[1][0] && ab_delta+new_uv[0][1]>=new_uv[1][1] && // even parity. for odd parity this is always true + new_ab[1]>=-new_uv[1][1] && ab_delta+new_uv[0][0]>=new_uv[1][0] // odd parity. for even parity this is always true + ; + + if (debug_output) print( "8:", new_ab[1], new_uv[1][0], ab_delta, new_uv[0][1], new_uv[1][1] ); + if (debug_output) print( "9:", new_ab[1], new_uv[1][1], ab_delta, new_uv[0][0], new_uv[1][0] ); + if (debug_output) print( "10:", passed ); + + if (!passed) { + if (debug_output) print( "11:" ); + + if (enable_table) { + //this will make the table not change the output of the algorithm + //can just do a break in the actual code + //enable_table=false; continue; + + break; + } else { + break; + } + } + } + + ab=new_ab; + uv=new_uv; + progress=true; + ++iter; + + //print( " gcd_base quotient", q ); + + //print( "foo" ); + { + //this would overflow a double; it works with modular arithmetic + int64 a_expected=int64(uv[0][0])*int64(ab_start[0]) + int64(uv[0][1])*int64(ab_start[1]); + int64 b_expected=int64(uv[1][0])*int64(ab_start[0]) + int64(uv[1][1])*int64(ab_start[1]); + assert(int64(ab[0])==a_expected); + assert(int64(ab[1])==b_expected); + } + + if (iter>=gcd_base_max_iter) { + break; + } + + //todo break; + } + + //print( " gcd_base", iter_table+iter_slow, iter_table, iter_slow ); + + #ifdef TEST_ASM + #ifndef GENERATE_ASM_TRACKING_DATA + if (test_asm_run) { + if (test_asm_print) { + print( "test asm gcd_base", test_asm_counter ); + } + + double asm_ab[]={ab_start[0], ab_start[1]}; + double asm_u[2]; + double asm_v[2]; + uint64 asm_is_lehmer[2]={(is_lehmer)? ~0ull : 0ull, (is_lehmer)? ~0ull : 0ull}; + double asm_ab_threshold[2]={ab_threshold, ab_threshold}; + uint64 asm_no_progress; + int error_code=asm_code::asm_func_gcd_base(asm_ab, asm_u, asm_v, asm_is_lehmer, asm_ab_threshold, &asm_no_progress); + + assert(error_code==0); + assert(asm_ab[0]==ab[0]); + assert(asm_ab[1]==ab[1]); + assert(asm_u[0]==uv[0][0]); + assert(asm_u[1]==uv[1][0]); + assert(asm_v[0]==uv[0][1]); + assert(asm_v[1]==uv[1][1]); + assert(asm_no_progress==int(!progress)); + } + #endif + #endif + + return progress; +} + +bool gcd_base_continued_fraction_2(vector2& ab_double, matrix2& uv_double, bool is_lehmer, double ab_threshold_double=0) { + int64 a_int=int64(ab_double[0]); + int64 b_int=int64(ab_double[1]); + int64 threshold_int=int64(ab_threshold_double); + + assert(a_int>b_int && b_int>0); + + array ab={a_int, b_int}; + array u={1, 0}; + array v={0, 1}; + + auto apply=[&](int64 q, array x) -> array { + return { + x[1], + x[0]-q*x[1] + }; + }; + + vector res; + + int num_iter=0; + int num_quotients=0; + + while (ab[1]>threshold_int) { + //print( " gcd_base_2", ab[0] ); + + int64 q=ab[0]/ab[1]; + assert(q>=0); + + array new_ab=apply(q, ab); + array new_u=apply(q, u); + array new_v=apply(q, v); + + ++num_iter; + + if (is_lehmer && !check_lehmer(new_ab, new_u, new_v)) { + break; + } + + //print(num_iter, u[0], u[1], v[0], v[1]); + + auto ab_double_new=ab_double; + auto uv_double_new=uv_double; + + ab_double_new[0]=double(new_ab[0]); + ab_double_new[1]=double(new_ab[1]); + uv_double_new[0][0]=double(new_u[0]); + uv_double_new[0][1]=double(new_v[0]); + uv_double_new[1][0]=double(new_u[1]); + uv_double_new[1][1]=double(new_v[1]); + + if ( + int64(ab_double_new[0])!=new_ab[0] || + int64(ab_double_new[1])!=new_ab[1] || + int64(uv_double_new[0][0])!=new_u[0] || + int64(uv_double_new[0][1])!=new_v[0] || + int64(uv_double_new[1][0])!=new_u[1] || + int64(uv_double_new[1][1])!=new_v[1] + ) { + break; + } + + ab=new_ab; + u=new_u; + v=new_v; + + ab_double=ab_double_new; + uv_double=uv_double_new; + + //print( " gcd_base_2 quotient", q ); + + res.push_back(q); + ++num_quotients; + + //todo break; + } + + return num_quotients!=0; +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/gcd_base_divide_table.h b/lib/chiavdf/fast_vdf/gcd_base_divide_table.h new file mode 100644 index 00000000..2be950ba --- /dev/null +++ b/lib/chiavdf/fast_vdf/gcd_base_divide_table.h @@ -0,0 +1,232 @@ +const uint64 data_mask=bit_sequence(0, data_size); +const int carry_size=64-data_size; +const uint64 carry_mask=bit_sequence(data_size, carry_size); + +namespace simd_integer_namespace { + + +int64 abs_int(int64 v) { + return (v<0)? -v : v; +} + +int divide_table_stats_calls=0; +int divide_table_stats_table=0; + +//generic_stats gcd_64_num_iterations; + +//used for both gcd and reduce +int64 divide_table_lookup(int64 index) { + assert(index>=0 && index<=bit_sequence(0, divide_table_index_bits)); + + uint128 res = (~uint128(0)) / uint128(max(uint64(index), uint64(1))); + res>>=64; + + return res; +} + +int64 divide_table_64(int64 a, int64 b, int64& q) { + assert(b>0); + + q=a/b; + int64 r=a%b; + + if (r<0) { + r+=b; + --q; + } + + assert(r>=0 && r0); + + //b_shift=(64-divide_table_index_bits) - lzcnt(b) + //bsr(b)=63-lzcnt(b) + //63-bsr(b)=lzcnt(b) + //b_shift=(64-divide_table_index_bits) - 63-bsr(b) + //b_shift=64-divide_table_index_bits - 63 + bsr(b) + //b_shift=1-divide_table_index_bits + bsr(b) + //b_shift=bsr(b) - (divide_table_index_bits-1) + + int b_shift = (64-divide_table_index_bits) - __builtin_clzll(b); + if (b_shift<0) { //almost never happens + b_shift=0; + } + + int64 b_approx = b >> b_shift; + int64 b_approx_inverse = divide_table_lookup(b_approx); + + q = (int128(a)*int128(b_approx_inverse)) >> 64; //high part of product + q >>= b_shift; + + int128 qb_128=int128(q)*int128(b); + int64 qb_64=int64(qb_128); + + int128 r_128=int128(a)-int128(qb_64); + int64 r_64=int64(r_128); + + //int128 r=int128(a)-int128(q)*int128(b); + //if (uint128(r)>=b) { + + bool invalid_1=(int128(qb_64)!=qb_128 || int128(r_64)!=r_128 || uint64(r_64)>=b); + + int128 r_2=int128(a)-int128(q)*int128(b); + + bool invalid_2=(uint128(r_2)>=b); + + assert(invalid_1==invalid_2); + if (!invalid_2) { + assert(r_64==int64(r_2)); + } + + int64 r=r_2; + if (invalid_2) { + r=divide_table_64(a, b, q); + } else { + ++divide_table_stats_table; + } + + int64 q_expected; + int64 r_expected=divide_table_64(a, b, q_expected); + + assert(q==q_expected); + assert(r==r_expected); + + //if (test_asm_funcs) { + //int64 q_asm; + //int64 r_asm=divide_table_asm(a, b, q_asm); + + //assert(q_asm==q_expected); + //assert(r_asm==r_expected); + //} + + return r; +} + +void gcd_64( + array start_a, pair, array>& res, int& num_iterations, bool approximate, int max_iterations +) { + const bool test_asm_funcs=false; + + array uv={1, 0, 0, 1}; + array a=start_a; + + num_iterations=0; + + if (approximate && (start_a[0]==start_a[1] || start_a[1]==0)) { + res=make_pair(uv, a); + return; + } + + int asm_num_iterations=0; + array uv_asm=uv; + array a_asm=a; + + while (true) { + if (test_asm_funcs) { + //if (gcd_64_iteration_asm(a_asm, uv_asm, approximate)) { + //++asm_num_iterations; + //} + } + + if (a[1]==0) { + break; + } + + assert(a[0]>a[1] && a[1]>0); + + int64 q; + int64 r=divide_table(a[0], a[1], q); + { + int shift_amount=63-gcd_num_quotient_bits; + if ((q<>shift_amount!=q) { + break; + } + } + + + array new_a={a[1], r}; + + array new_uv; + for (int x=0;x<2;++x) { + new_uv[0*2+x]=uv[1*2+x]; + new_uv[1*2+x]=uv[0*2+x] - q*uv[1*2+x]; + } + + bool valid=true; + + if (approximate) { + assert(new_uv[1*2+0]!=0); + bool is_even=(new_uv[1*2+0]<0); + + bool valid_exact; + if (is_even) { + valid_exact=(new_a[1]>=-new_uv[1*2+0] && new_a[0]-new_a[1]>=new_uv[1*2+1]-new_uv[0*2+1]); + } else { + valid_exact=(new_a[1]>=-new_uv[1*2+1] && new_a[0]-new_a[1]>=new_uv[1*2+0]-new_uv[0*2+0]); + } + + //valid=valid_exact; + valid= + (new_a[1]>=-new_uv[1*2+0] && new_a[0]-new_a[1]>=new_uv[1*2+1]-new_uv[0*2+1]) && + (new_a[1]>=-new_uv[1*2+1] && new_a[0]-new_a[1]>=new_uv[1*2+0]-new_uv[0*2+0]) + ; + + assert(valid==valid_exact); + + if (valid) { + assert(valid_exact); + } + } + + //have to do this even if approximate is false + for (int x=0;x<4;++x) { + if (abs_int(new_uv[x])>data_mask) { + valid=false; + } + } + + if (!valid) { + break; + } + + uv=new_uv; + a=new_a; + ++num_iterations; + + if (test_asm_funcs) { + assert(uv==uv_asm); + assert(a==a_asm); + assert(num_iterations==asm_num_iterations); + } + + if (num_iterations>=max_iterations) { + break; + } + } + + //gcd_64_num_iterations.add(num_iterations); + + for (int x=0;x<4;++x) { + assert(abs_int(uv[x])<=data_mask); + } + + if (test_asm_funcs) { + assert(uv==uv_asm); + //assert(a==a_asm); the asm code will update a even if it becomes invalid; fine since it's not used + assert(num_iterations==asm_num_iterations); + } + + res=make_pair(uv, a); +} + + +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/gcd_unsigned.h b/lib/chiavdf/fast_vdf/gcd_unsigned.h new file mode 100644 index 00000000..6e71d6fc --- /dev/null +++ b/lib/chiavdf/fast_vdf/gcd_unsigned.h @@ -0,0 +1,345 @@ +//threshold is 0 to calculate the normal gcd +template void gcd_unsigned_slow( + array, 2>& ab, + array, 2>& uv, + int& parity, + fixed_integer threshold=fixed_integer(integer(0)) +) { + assert(ab[0]>threshold); + + while (ab[1]>threshold) { + fixed_integer q(ab[0]/ab[1]); + fixed_integer r(ab[0]%ab[1]); + + ab[0]=ab[1]; + ab[1]=r; + + //this is the absolute value of the cofactor matrix + auto u1_new=uv[0] + q*uv[1]; + uv[0]=uv[1]; + uv[1]=u1_new; + + parity=-parity; + } +} + +//todo +//test this by making two numbers that have a specified quotient sequence. can add big quotients then +//to generate numbers with a certain quotient sequence: +//euclidean algorithm: q=a/b ; a'=b ; b'=a-q*b ; terminates when b'=0 +//initially b'=0 and all qs are known +//first iteration: b'=a-q*b=0 ; a=q*b ; select some b and this will determine a +//next: b'=a-q*b ; a'=b ; b'=a-q*a' ; b'+q*a'=a + +//uv is <1,0> to calculate |u| and <0,1> to calculate |v| +//parity is negated for each quotient +template void gcd_unsigned( + array, 2>& ab, + array, 2>& uv, + int& parity, + fixed_integer threshold=fixed_integer(integer(0)) +) { + typedef fixed_integer int_t; + + static int test_asm_counter=0; + ++test_asm_counter; + + bool test_asm_run=true; + bool test_asm_print=(test_asm_counter%1000==0); + bool debug_output=false; + + assert(ab[0]>=ab[1] && !ab[1].is_negative()); + assert(!ab[0].is_negative() && !ab[1].is_negative()); + assert(!uv[0].is_negative() && !uv[1].is_negative()); + + auto ab_start=ab; + auto uv_start=uv; + int parity_start=parity; + int a_num_bits_old=-1; + + int iter=0; + + vector, 2>> matricies; + vector local_parities; + bool valid=true; + + while (true) { + assert(ab[0]>=ab[1] && !ab[1].is_negative()); + + if (debug_output) { + print( "" ); + print( "" ); + print( "====================================" ); + + for (int x=0;x ab_head={ + uint128(ab[0].window(shift_amount)) | (uint128(ab[0].window(shift_amount+64))<<64), + uint128(ab[1].window(shift_amount)) | (uint128(ab[1].window(shift_amount+64))<<64) + }; + //assert((ab_head[0]>>127)==0); + //assert((ab_head[1]>>127)==0); + + uint128 threshold_head=uint128(threshold.window(shift_amount)) | (uint128(threshold.window(shift_amount+64))<<64); + //assert((threshold_head>>127)==0); + + //don't actually need to do this + //it will compare threshold_head with > so it will already exit if they are equal + //if (shift_amount!=0) { + // ++threshold_head; + //} + + if (debug_output) print( "a_num_bits:", a_num_bits ); + if (debug_output) print( "a last index:", (a_num_bits+63/64)-1 ); + if (debug_output) print( "shift_amount:", shift_amount ); + if (debug_output) print( "ab_head[0]:", uint64(ab_head[0]), uint64(ab_head[0]>>64) ); + if (debug_output) print( "ab_head[1]:", uint64(ab_head[1]), uint64(ab_head[1]>>64) ); + if (debug_output) print( "threshold_head:", uint64(threshold_head), uint64(threshold_head>>64) ); + + array, 2> uv_uint64; + int local_parity; //1 if odd, 0 if even + if (gcd_128(ab_head, uv_uint64, local_parity, shift_amount!=0, threshold_head)) { + //int local_parity=(uv_double[1][1]<0)? 1 : 0; //sign bit + bool even=(local_parity==0); + + if (debug_output) print( "u:", uv_uint64[0][0], uv_uint64[1][0] ); + if (debug_output) print( "v:", uv_uint64[0][1], uv_uint64[1][1] ); + if (debug_output) print( "local parity:", local_parity ); + + uint64 uv_00=uv_uint64[0][0]; + uint64 uv_01=uv_uint64[0][1]; + uint64 uv_10=uv_uint64[1][0]; + uint64 uv_11=uv_uint64[1][1]; + + //can use a_num_bits to make these smaller. this is at most a 2x speedup for these mutliplications which probably doesn't matter + //can do this with an unsigned subtraction and just swap the pointers + // + //this is an unsigned subtraction with the input pointers swapped to make the result nonnegative + // + //this uses mulx/adox/adcx if available for the multiplication + //will unroll the multiplication loop but early-exit based on the number of limbs in a (calculated before). this gives each + //branch its own branch predictor entry. each branch is at a multiple of 4 limbs. don't need to pad a + int_t a_new_1=ab[0]; a_new_1*=uv_00; a_new_1.set_negative(!even); + int_t a_new_2=ab[1]; a_new_2*=uv_01; a_new_2.set_negative(even); + int_t b_new_1=ab[0]; b_new_1*=uv_10; b_new_1.set_negative(even); + int_t b_new_2=ab[1]; b_new_2*=uv_11; b_new_2.set_negative(!even); + + //both of these are subtractions; the signs determine the direction. the result is nonnegative + int_t a_new; + int_t b_new; + if (!even) { + a_new=int_t(a_new_2 + a_new_1); + b_new=int_t(b_new_1 + b_new_2); + } else { + a_new=int_t(a_new_1 + a_new_2); + b_new=int_t(b_new_2 + b_new_1); + } + + //this allows the add function to be optimized + assert(!a_new.is_negative()); + assert(!b_new.is_negative()); + + //do not do any of this stuff; instead return an array of matricies + //the array is processed while it is being generated so it is cache line aligned, has a counter, etc + + ab[0]=a_new; + ab[1]=b_new; + + //bx and by are nonnegative + auto dot=[&](uint64 ax, uint64 ay, int_t bx, int_t by) -> int_t { + bx*=ax; + by*=ay; + return int_t(bx+by); + }; + + int_t new_uv_0=dot(uv_00, uv_01, uv[0], uv[1]); + int_t new_uv_1=dot(uv_10, uv_11, uv[0], uv[1]); + + uv[0]=new_uv_0; + uv[1]=new_uv_1; + + //local_parity is 0 even, 1 odd + //want 1 even, -1 odd + //todo: don't do this; just make it 0 even, 1 odd + parity*=1-local_parity-local_parity; + + matricies.push_back(uv_uint64); + local_parities.push_back(local_parity); + } else { + //can just make the gcd fail if this happens in the asm code + print( " gcd_unsigned slow" ); + //todo assert(false); //very unlikely to happen if there are no bugs + + valid=false; + break; + + /*had_slow=true; + + fixed_integer q(ab[0]/ab[1]); + fixed_integer r(ab[0]%ab[1]); + + ab[0]=ab[1]; + ab[1]=r; + + //this is the absolute value of the cofactor matrix + auto u1_new=uv[0] + q*uv[1]; + uv[0]=uv[1]; + uv[1]=u1_new; + + parity=-parity;*/ + } + + ++iter; + } + + { + auto ab2=ab_start; + auto uv2=uv_start; + int parity2=parity_start; + gcd_unsigned_slow(ab2, uv2, parity2, threshold); + + if (valid) { + assert(integer(ab[0]) == integer(ab2[0])); + assert(integer(ab[1]) == integer(ab2[1])); + assert(integer(uv[0]) == integer(uv2[0])); + assert(integer(uv[1]) == integer(uv2[1])); + assert(parity==parity2); + } else { + ab=ab2; + uv=uv2; + parity=parity2; + } + } + + #ifdef TEST_ASM + if (test_asm_run) { + if (test_asm_print) { + print( "test asm gcd_unsigned", test_asm_counter ); + } + + asm_code::asm_func_gcd_unsigned_data asm_data; + + const int asm_size=gcd_size; + const int asm_max_iter=gcd_max_iterations; + + assert(size>=1 && size<=asm_size); + + fixed_integer asm_a(ab_start[0]); + fixed_integer asm_b(ab_start[1]); + fixed_integer asm_a_2; + fixed_integer asm_b_2; + fixed_integer asm_threshold(threshold); + + uint64 asm_uv_counter_start=1234; + uint64 asm_uv_counter=asm_uv_counter_start; + + array, asm_max_iter+1> asm_uv; + + asm_data.a=&asm_a[0]; + asm_data.b=&asm_b[0]; + asm_data.a_2=&asm_a_2[0]; + asm_data.b_2=&asm_b_2[0]; + asm_data.threshold=&asm_threshold[0]; + + asm_data.uv_counter_start=asm_uv_counter_start; + asm_data.out_uv_counter_addr=&asm_uv_counter; + asm_data.out_uv_addr=(uint64*)&asm_uv[1]; + asm_data.iter=-2; //uninitialized + asm_data.a_end_index=size-1; + + int error_code=asm_code::asm_func_gcd_unsigned(&asm_data); + + auto asm_get_uv=[&](int i) { + array, 2> res; + res[0][0]=asm_uv[i+1][0]; + res[1][0]=asm_uv[i+1][1]; + res[0][1]=asm_uv[i+1][2]; + res[1][1]=asm_uv[i+1][3]; + return res; + }; + + auto asm_get_parity=[&](int i) { + uint64 r=asm_uv[i+1][4]; + assert(r==0 || r==1); + return bool(r); + }; + + auto asm_get_exit_flag=[&](int i) { + uint64 r=asm_uv[i+1][5]; + assert(r==0 || r==1); + return bool(r); + }; + + if (error_code==0) { + assert(valid); + + assert(asm_data.iter>=0 && asm_data.iter<=asm_max_iter); //total number of iterations performed + bool is_even=((asm_data.iter-1)&1)==0; //parity of last iteration (can be -1) + + fixed_integer& asm_a_res=(is_even)? asm_a_2 : asm_a; + fixed_integer& asm_b_res=(is_even)? asm_b_2 : asm_b; + + assert(integer(asm_a_res) == integer(ab[0])); + assert(integer(asm_b_res) == integer(ab[1])); + + for (int x=0;x<=matricies.size();++x) { + assert( asm_get_exit_flag(x-1) == (x==matricies.size()) ); + + if (x!=matricies.size()) { + assert(asm_get_parity(x)==local_parities[x]); + assert(asm_get_uv(x)==matricies[x]); + } + } + + assert(matricies.size()==asm_data.iter); + assert(asm_uv_counter==asm_uv_counter_start+asm_data.iter-1); //the last iteration that updated the counter is iter-1 + } else { + if (!valid) { + print( "test asm gcd_unsigned error", error_code ); + } + } + } + #endif + + assert(integer(ab[0])>integer(threshold)); + assert(integer(ab[1])<=integer(threshold)); +} diff --git a/lib/chiavdf/fast_vdf/generic.h b/lib/chiavdf/fast_vdf/generic.h new file mode 100644 index 00000000..0349bbd7 --- /dev/null +++ b/lib/chiavdf/fast_vdf/generic.h @@ -0,0 +1,252 @@ +#include "generic_macros.h" +#include + +#ifndef ILYA_SHARED_HEADER_GENERIC +#define ILYA_SHARED_HEADER_GENERIC + +namespace generic { +using namespace std; + + +template void print_impl(ostream& out, const type_a& a) {} + +template void print_impl(ostream& out, const char* a, const type_b& b) { + out << " " << b; +} + +template void print_impl(ostream& out, const type_a& a, const type_b& b) { + out << ", " << b; +} + +template void print_impl(ostream& out, const type_a& a, const type_b& b, const types&... targs) { + print_impl(out, a, b); + print_impl(out, b, targs...); +} + +template void print_to(ostream& out, const type_a& a, const types&... targs) { + out << a; + print_impl(out, a, targs...); + out << "\n"; +} + +template void print(const type_a& a, const types&... targs) { + print_to(cerr, a, targs...); +} + +//if buffer is not null, will return an empty string +string getstream(istream& targ, int block_size=10, string* buffer=nullptr) { + string new_buffer; + string& res=(buffer!=nullptr)? *buffer : new_buffer; + res.clear(); + + while(1) { + res.resize(res.size()+block_size); + targ.read(&(res[res.size()-block_size]), block_size); + int c=targ.gcount(); + if (c!=block_size) { + res.resize(res.size()-block_size+c); + assert(targ.eof()); + return new_buffer; + } + } +} + +string getfile(const string& name, bool binary=0, int block_size=1024) { + ifstream in(name, binary? ios::binary|ios_base::in : ios_base::in); + assert(in.good()); + return getstream(in, block_size); +} + +struct less_ptr { + template bool operator()(ptr_type a, ptr_type b) { + return *a<*b; + } +}; + +template type instance_of(); + +template std::string to_string(std::ostringstream& s, const type& targ) { + s.clear(); + s.str(""); + s << targ; + return s.str(); +} +template std::string to_string(const type& targ) { + static std::ostringstream s; + return to_string(s, targ); +} + +template pair checked_from_string(std::istringstream& s, const std::string& targ) { + s.clear(); + s.str(targ); + type res; + s >> res; + return make_pair(res, s.eof() && !s.fail()); +} +template type from_string(std::istringstream& s, const std::string& targ) { + return checked_from_string(s, targ).first; +} +template type from_string(const std::string& targ) { + static std::istringstream s; + return from_string(s, targ); +} +template pair checked_from_string(const std::string& targ) { + static std::istringstream s; + return checked_from_string(s, targ); +} + +template type assert_from_string(const std::string& targ) { + auto res=checked_from_string(targ); + assert(res.second); + return res.first; +} + +template unique_ptr make_unique_ptr(types&&... targs) { + return unique_ptr(new type(forward(targs)...)); +} + +template int array_size(type(&)[size]) { + return size; +} + +template std::ostream& print_as_number(std::ostream& out, const type& targ) { out << targ; return out; } +template<> std::ostream& print_as_number(std::ostream& out, const unsigned char& targ) { out << int(targ); return out; } +template<> std::ostream& print_as_number(std::ostream& out, const signed char& targ) { out << int(targ); return out; } +template<> std::ostream& print_as_number(std::ostream& out, const char& targ) { out << int(targ); return out; } + +// + +template struct only_if {}; +template struct only_if<1, type> { typedef type good; }; +template typename only_if::good assert_true() {} + +template struct only_if_same_types {}; +template struct only_if_same_types { typedef type good; }; +template typename only_if_same_types::good assert_same_types() {} + +template struct only_if_not_same_types { typedef type good; }; +template struct only_if_not_same_types {}; +template typename only_if_not_same_types::good assert_not_same_types() {} + +template struct static_abs { static const int res=n<0? -n : n; }; +template struct static_sgn { static const int res=n<0? -1 : (n>0? 1 : 0); }; +template struct static_max { static const int res=a>b? a : b; }; +template struct static_min { static const int res=a class wrap_type { typedef type res; }; + +// + +template class union_pair { + template friend class union_pair; + static const size_t size_bytes=static_max::res; + static const size_t alignment_bytes=static_max::res; + typename aligned_storage::type buffer; + bool t_is_first; + + public: + union_pair() : t_is_first(1) { new(&buffer) type_a(); } + union_pair(int, int) : t_is_first(0) { new(&buffer) type_b(); } + union_pair(const type_a& targ) : t_is_first(1) { new(&buffer) type_a(targ); } + union_pair(const type_b& targ) : t_is_first(0) { new(&buffer) type_b(targ); } + union_pair(type_a&& targ) : t_is_first(1) { new(&buffer) type_a(move(targ)); } + union_pair(type_b&& targ) : t_is_first(0) { new(&buffer) type_b(move(targ)); } + union_pair(const union_pair& targ) : t_is_first(targ.t_is_first) { + if (t_is_first) new(&buffer) type_a(targ.first()); else new(&buffer) type_b(targ.second()); + } + union_pair(const union_pair& targ) : t_is_first(!targ.t_is_first) { + if (t_is_first) new(&buffer) type_a(targ.second()); else new(&buffer) type_b(targ.first()); + } + union_pair(union_pair&& targ) : t_is_first(targ.t_is_first) { + if (t_is_first) new(&buffer) type_a(move(targ.first())); else new(&buffer) type_b(move(targ.second())); + } + union_pair(union_pair&& targ) : t_is_first(!targ.t_is_first) { + if (t_is_first) new(&buffer) type_a(move(targ.second())); else new(&buffer) type_b(move(targ.first())); + } + + union_pair& operator=(const type_a& targ) { + if (is_first()) first()=targ; else set_first(targ); + return *this; + } + union_pair& operator=(const type_b& targ) { + if (is_second()) second()=targ; else set_second(targ); + return *this; + } + + union_pair& operator=(type_a&& targ) { + if (is_first()) first()=move(targ); else set_first(move(targ)); + return *this; + } + union_pair& operator=(type_b&& targ) { + if (is_second()) second()=move(targ); else set_second(move(targ)); + return *this; + } + + union_pair& operator=(const union_pair& targ) { + if (targ.is_first()) { + return *this=targ.first(); + } else { + return *this=targ.second(); + } + } + union_pair& operator=(const union_pair& targ) { + if (targ.is_first()) { + return *this=targ.first(); + } else { + return *this=targ.second(); + } + } + + union_pair& operator=(union_pair&& targ) { + if (targ.is_first()) { + return *this=move(targ.first()); + } else { + return *this=move(targ.second()); + } + } + union_pair& operator=(union_pair&& targ) { + if (targ.is_first()) { + return *this=move(targ.first()); + } else { + return *this=move(targ.second()); + } + } + + typedef type_a first_type; + typedef type_b second_type; + + bool is_first() const { return t_is_first; } + bool is_second() const { return !t_is_first; } + // + type_a& first() { return *reinterpret_cast(&buffer); } + const type_a& first() const { return *reinterpret_cast(&buffer); } + type_b& second() { return *reinterpret_cast(&buffer); } + const type_b& second() const { return *reinterpret_cast(&buffer); } + // + template type_a& set_first(types&&... targs) { + if (!t_is_first) { + second().~type_b(); + t_is_first=1; + } else { + first().~type_a(); + } + return *(new(&buffer) type_a(forward(targs)...)); + } + template type_b& set_second(types&&... targs) { + if (t_is_first) { + first().~type_a(); + t_is_first=0; + } else { + second().~type_b(); + } + return *(new(&buffer) type_b(forward(targs)...)); + } + + ~union_pair() { + if (t_is_first) first().~type_a(); else second().~type_b(); + } +}; + +} + +#endif diff --git a/lib/chiavdf/fast_vdf/generic_macros.h b/lib/chiavdf/fast_vdf/generic_macros.h new file mode 100644 index 00000000..da5842ae --- /dev/null +++ b/lib/chiavdf/fast_vdf/generic_macros.h @@ -0,0 +1,34 @@ +#ifndef ILYA_SHARED_HEADER_GENERIC_MACROS +#define ILYA_SHARED_HEADER_GENERIC_MACROS + +/* +#define main(...) \ + main_inner(int argc, char** argv); \ + int main(int argc, char** argv) { \ + try {\ + return main_inner(argc, argv);\ + } catch(const std::exception& e) {\ + std::cerr << "\n\nUncaught exception: " << e.what() << "\n";\ + char *f=0; *f=1;\ + } catch(const std::string& e) {\ + std::cerr << "\n\nUncaught exception: " << e << "\n";\ + char *f=0; *f=1;\ + } catch(const char* e) {\ + std::cerr << "\n\nUncaught exception: " << e << "\n";\ + char *f=0; *f=1;\ + } catch(...) {\ + std::cerr << "\n\nUncaught exception.\n";\ + char *f=0; *f=1;\ + }\ + }\ + int main_inner(int argc, char** argv) + +#ifndef NO_GENERIC_H_ASSERT + #ifdef assert + #undef assert + #endif + #define assert(v) if (!(v)) { std::cerr << "\n\nAssertion failed: " << __FILE__ << " : " << __LINE__ << "\n"; char* shared_generic_assert_char_123=nullptr; *shared_generic_assert_char_123=1; throw 0; } (void)0 +#endif +*/ + +#endif diff --git a/lib/chiavdf/fast_vdf/gpu_integer.h b/lib/chiavdf/fast_vdf/gpu_integer.h new file mode 100644 index 00000000..0840066d --- /dev/null +++ b/lib/chiavdf/fast_vdf/gpu_integer.h @@ -0,0 +1,639 @@ +template int_type add_carry(int_type a, int_type b, int carry_in, int& carry_out) { + assert(carry_in==0 || carry_in==1); + uint128 res=uint128(a) + uint128(b) + uint128(carry_in); + + carry_out=int(res >> (sizeof(int_type)*8)); + assert(carry_out==0 || carry_out==1); + + return int_type(res); +} + +template int_type sub_carry(int_type a, int_type b, int carry_in, int& carry_out) { + assert(carry_in==0 || carry_in==1); + uint128 res=uint128(a) - uint128(b) - uint128(carry_in); + + carry_out=int(res >> (sizeof(int_type)*8)) & 1; + assert(carry_out==0 || carry_out==1); + + return int_type(res); +} + +template int clz(int_type a) { + assert(sizeof(int_type)==4 || sizeof(int_type)==8); + + if (a==0) { + return (sizeof(int_type)==4)? 32 : 64; + } else { + return (sizeof(int_type)==4)? __builtin_clz(uint32(a)) : __builtin_clzll(uint64(a)); + } +} + +uint64 mul_high(uint64 a, uint64 b) { + return uint64((uint128(a)*uint128(b))>>64); +} + +uint32 mul_high(uint32 a, uint32 b) { + return uint32((uint64(a)*uint64(b))>>32); +} + +constexpr int max_constexpr(int a, int b) { + if (a>b) { + return a; + } else { + return b; + } +} + +//all "=" operators truncate ; all operators that return a separate result will pad the result as necessary +template struct fixed_integer { + static const type positive_sign=0; + static const type negative_sign=~type(0); + + type data[size+1]; //little endian; sign is first + + fixed_integer() { + for (int x=0;x explicit fixed_integer(fixed_integer t) { + for (int x=0;x fixed_integer& operator=(fixed_integer t) { return *this=fixed_integer(t); } + + bool is_negative() const { + return !is_zero() && data[0]==negative_sign; + } + + void set_negative(bool t_negative) { + data[0]=(t_negative)? negative_sign : positive_sign; + } + + type& operator[](int pos) { + assert(pos>=0 && pos=0 && posb + //there is also a fast comparison in the add function, but it has a slow path + static int compare( + const type* a, int size_a, type sign_a, + const type* b, int size_b, type sign_b + ) { + int carry=0; + type zero=0; + + //this calculates |a|-|b|. all of the resulted are or'ed together in zero + for (int x=0;x|b| (|a|-|b| is positive) + + //same sign, positive: use res + //same sign, negative: negate res + //opposite signs: use res if 0, otherwise 1 if sign_a is positive, -1 if sign_a is negative + + int res=0; + if (zero!=0) res=1; + if (carry==1) res=-1; + + //todo //get rid of branches + //this is used to implement exactly one comparison with a binary result, so that should get rid of all of these branches + if (sign_a==sign_b) { + if (sign_a==negative_sign) { + res=-res; + } + } else { + if (res!=0) { + res=(sign_a==negative_sign)? -1 : 1; + } + } + + return res; + } + + template int compare(fixed_integer b) const { + return compare( + data+1, size, data[0], + b.data+1, size, b.data[0] + ); + } + + //a, b, and res can alias with each other but only if the pointers are equal + //the sign is not present in a/b/res + static void add( + const type* a, int size_a, type sign_a, + const type* b, int size_b, type sign_b, + type* res, int size_res, type& sign_res + ) { + if (size_b>size_a) { + swap(a, b); + swap(size_a, size_b); + swap(sign_a, sign_b); + } + + assert(size_res>=size_a && size_a>=size_b && size_b>=1); + + type mask=sign_a ^ sign_b; //all 1s if opposite signs, else all 0s. this isn't affected by swapping + + type swap_mask=positive_sign; + + if (size_a==size_b) { + //carry flag + int size_ab=size_a; + bool a_less_than_b=a[size_ab-1]=2) { + a_less_than_b=a[size_ab-2] + fixed_integer operator+( + fixed_integer b + ) const { + const int output_size=max_constexpr(size, b_size)+1; + + fixed_integer res; + + add( + data+1, size, data[0], + b.data+1, b_size, b.data[0], + res.data+1, output_size, res.data[0] + ); + + return res; + } + + template + fixed_integer operator-( + fixed_integer b + ) const { + const int output_size=max_constexpr(size, b_size)+1; + + fixed_integer res; + + add( + data+1, size, data[0], + b.data+1, b_size, negative_sign^b.data[0], + res.data+1, output_size, res.data[0] + ); + + return res; + } + + //res=a*b+c + //res can alias with c if the pointers are equal. can't alias with a + //if c is null then it is all 0s + static void mad( + const type* a, int size_a, + type b, + const type* c, int size_c, + type* res, int size_res + ) { + assert(size_res>=size_c && size_c>=size_a && size_a>=1); + + type previous_high=0; + int carry_mul=0; + int add_mul=0; + + for (int x=0;x=size_a)? 0 : a[x]; + + type this_low=this_a*b; + type this_high=mul_high(this_a, b); + + type mul_res=add_carry(this_low, previous_high, carry_mul, carry_mul); + + if (x==0) { + assert(mul_res==this_low && carry_mul==0); + } else + if (x==size_a) { + assert(carry_mul==0); + } else + if (x>size_a) { + assert(mul_res==0 && carry_mul==0); + } + + type this_c=(x>=size_c || c==nullptr)? 0 : c[x]; + type add_res=add_carry(mul_res, this_c, add_mul, add_mul); + + res[x]=add_res; + + previous_high=this_high; + } + } + + //can't overflow + //two of these can implement a 1024x512 mul. for 1024x1024, need to do 2x 1024x512 in separate buffers then add them + static void mad_8x8(array a, array b, array c, array& res) { + for (int x=0;x<8;++x) { + res[x]=c[x]; + } + for (int x=8;x<16;++x) { + res[x]=0; + } + + for (int x=0;x<8;++x) { + //this uses a sliding window for the 8 res registers (no spilling) + //-the lowest register is finished after the first addition in mad. the this_low,previous_high addition is skipped + //-the highest register does not need to be loaded until the last multiplication in mad. actually this would always load 0 + // so it is not done + //-the total number of registers is therefore 7 + //there is one register for b + //the 8 a values are in registers but some or all may be spilled + //need 2 registers to store the MULX result + //need 1 register to store the previous high result (this is initially 0) + //the this_low,previous_high add result goes into one of those registers + //the mul_res,this_c result goes into the c register + //total registers is 18 then; 2 are spilled + //address registers: + //-will just use a static 32-bit address space for most of the code. can store the stack pointer there then + //-address registers are only used for b and res if the addresses are not static + //-the addresses are only used at the end of the loop, so there are spare registers to load the address registers from static + // memory. probably the addresses will be static though + mad(&a[0], 8, b[x], &res[x], 8, &res[x], 8); + } + } + + void operator*=(type v) { + mad( + data+1, size, + v, + nullptr, size, + data+1, size + ); + } + + template + static fixed_integer subset( + fixed_integer this_v, int start + ) { + const int end=start+t_size; + + fixed_integer res; + res.data[0]=this_v.data[0]; + + for (int x=start;x=0 && pos=0;--x) { + int pos=x-amount; + (*this)[x] = (pos>=0 && pos=0 && pos>64" statement. might wrap around + return; + } + + const int bits_per_limb=sizeof(type)*8; + assert(amount>0 && amount=0;--x) { + type previous=(x==0)? 0 : (*this)[x-1]; + (*this)[x] = ((*this)[x]<>(bits_per_limb-amount)); + } + } + + void operator>>=(int amount) { + if (amount==0) { + return; + } + + const int bits_per_limb=sizeof(type)*8; + assert(amount>0 && amount>amount) | (next<<(bits_per_limb-amount)); + } + } + + template + fixed_integer operator*( + fixed_integer b + ) const { + const int output_size=size+b_size; + fixed_integer res; + + for (int x=0;x(*this, 0); + r.data[0]=positive_sign; + + integer b_x_int(vector{b[x]}); + + r*=b[x]; + //auto r2=subset(r, 0); + //r2*=b[x]; + //r=r2; + + integer r_int(r); + integer this_int(abs(*this)); + integer expected_r_int=this_int*b_x_int; + assert(r_int==expected_r_int); + + r.left_shift_limbs(x); + r_int<<=x*sizeof(type)*8; + assert(r_int==integer(r)); + + integer res_old_int(res); + + //todo //figure out why this doesn't work. might have something to do with the msb being set? + res+=r; //unsigned + /*auto res3=res; + res3+=r; + + auto res2=res+r; + fixed_integer res4(res2);*/ + + /*if (integer(res3)!=integer(res4)) { + print( "========" ); + + res3=res; + res3+=r; + + //print( "========" ); + + auto res2_copy=res+r; + + assert(false); + }*/ + + //res=res4; + + integer res_new_int(res); + + assert(res_new_int==res_old_int+r_int); + } + + res.data[0]=data[0] ^ b.data[0]; + return res; + } + + fixed_integer operator<<(int num) const { + auto res=subset(*this, 0); + res<<=num; + return res; + } + + //this rounds to 0 so it is different from division unless the input is divisible by 2^num + fixed_integer operator>>(int num) const { + auto res=subset(*this, 0); + res>>=num; + return res; + } + + bool is_zero() const { + for (int x=0;x + bool operator>=(fixed_integer b) const { + return compare(b)>=0; + } + + template + bool operator==(fixed_integer b) const { + return compare(b)==0; + } + + template + bool operator<(fixed_integer b) const { + return compare(b)<0; + } + + template + bool operator<=(fixed_integer b) const { + return compare(b)<=0; + } + + template + bool operator>(fixed_integer b) const { + return compare(b)>0; + } + + template + bool operator!=(fixed_integer b) const { + return compare(b)!=0; + } + + //"0" has 1 bit + int num_bits() const { + type v=0; + int num_full=0; + + for (int x=size-1;x>=0;--x) { + if (v==0) { + v=(*this)[x]; + num_full=x; + } + } + + int v_bits; + if (v==0) { + v_bits=1; + assert(num_full==0); + } else + if (sizeof(v)==8) { + v_bits=64-__builtin_clzll(v); + } else{ + assert(sizeof(v)==4); + v_bits=32-__builtin_clz(v); + } + + return num_full*sizeof(type)*8 + v_bits; + } + + type window(int start_bit) const { + int bits_per_limb_log2=(sizeof(type)==8)? 6 : 5; + int bits_per_limb=1<>bits_per_limb_log2; + int start_offset=start_bit&(bits_per_limb-1); + + auto get_limb=[&](int pos) -> type { + assert(pos>=0); + return (pos>=size)? type(0) : (*this)[pos]; + }; + + type start=get_limb(start_limb)>>(start_offset); + + //the shift is undefined for start_offset==0 + type end=get_limb(start_limb+1)<<(bits_per_limb-start_offset); + + return (start_offset==0)? start : (start | end); + } +}; + +template fixed_integer abs(fixed_integer v) { + v.set_negative(false); + return v; +} + +template fixed_integer to_uint64(fixed_integer v) { + fixed_integer res; + res.set_negative(v.is_negative()); //sign extend data[0]. can just make data[0] 64 bits if i actually have to do this + + //this just copies the bytes over + for (int x=0;x>1]=uint64(high)<<32 | uint64(low); + } + + return res; +} + +template fixed_integer to_uint32(fixed_integer v) { + fixed_integer res; + res.set_negative(v.is_negative()); //lower 32 bits of data[0] + + for (int x=0;x>32); + } + + return res; +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/gpu_integer_divide.h b/lib/chiavdf/fast_vdf/gpu_integer_divide.h new file mode 100644 index 00000000..bfd25e90 --- /dev/null +++ b/lib/chiavdf/fast_vdf/gpu_integer_divide.h @@ -0,0 +1,378 @@ +//unsigned +template void normalize_divisor(fixed_integer& b, int& shift_limbs, int& shift_bits) { + shift_limbs=0; + //todo //make this a variable shift (could have done it on the gpu through shared memory; oh well) + for (int x=0;x= the actual reciprocal; max result is 2^63 +uint64 calculate_reciprocal(uint32 high, uint32 low) { + assert((high>>31)!=0); //should be normalized + + //bit 63 set + uint64 both_source=uint64(low) | (uint64(high)<<32); + + uint64 both=both_source; + + //bit 52 set + both>>=2*32-53; + + //clears bit 52 + both&=~(1ull<<52); + + uint64 res; + + if (both<=1) { + res=1ull<<63; + } else { + --both; + + uint64 bits=both; + bits|=1023ull<<52; + + double bits_double=*(double*)&bits; + bits_double=1.0/(bits_double); + bits=*(uint64*)&bits_double; + + bits&=(1ull<<52)-1; + + res=bits; + ++res; + + res|=1ull<<52; + res<<=(62-52); + } + + return res; +} + +//result is >= the actual quotient +uint32 calculate_quotient(uint32 high, uint32 low, uint64 reciprocal, uint32 b) { + uint64 both=uint64(low) | (uint64(high)<<32); + + uint64 product_high=(uint128(both)*uint128(reciprocal))>>64; + ++product_high; + + uint64 res=product_high>>(32-2); + + if (res>=1ull<<32) { + res=(1ull<<32)-1; + } + + return uint32(res); +} + +fixed_integer calculate_reciprocal(uint64 high, uint64 low); +uint64 calculate_quotient(uint64 high, uint64 low, fixed_integer reciprocal, uint64 b); + +//should pad a by 1 limb then left shift it by num_bits +//all integers are unsigned +template +void divide_integers_impl( + fixed_integer a, fixed_integer b, int b_shift_limbs, + fixed_integer& q, fixed_integer& r +) { + const int max_quotient_size=size_a-1; + fixed_integer res; + + auto reciprocal=calculate_reciprocal(b[size_b-1], (size_b>=2)? b[size_b-2] : 0); + + fixed_integer b_shifted; + b_shifted=b; + b_shifted.left_shift_limbs(size_a-size_b-1); //it is already left shifted by b_shift_limbs + + int quotient_size=size_a-(size_b-b_shift_limbs); + + for (int x=0;x=quotient_size) { + break; + } + { + type qj=calculate_quotient(a[size_a-1-x], a[size_a-2-x], reciprocal, b[size_b-1]); + + //this is slower than using the doubles even though the doubles waste half the registers + //ptxas generates horrible code which isn't scheduled properly + //uint64 qj_64=((uint64(a[size_a-1-x])<<32) | uint64(a[size_a-2-x])) / uint64(b[size_b-1]); + //uint32 qj=uint32(min( qj_64, uint64(~uint32(0)) )); + + auto a_start=a; + type qj_start=qj; + + auto b_shifted_qj=b_shifted; + b_shifted_qj*=qj; + a-=b_shifted_qj; + + while (a.is_negative()) { + //todo print( "slow division" ); + + --qj; + a+=b_shifted; + } + + b_shifted.right_shift_limbs(1); + + res[max_quotient_size-1-x]=qj; + } + } + + //todo //get rid of this; use variable shifts + for (int x=0;x=max_quotient_size) { + break; + } + + res.right_shift_limbs(1); + ++quotient_size; + } + + q=res; + r=a; + + //todo print( "====" ); +} + +//these are signed +//this has a bug if size_a +void divide_integers( + fixed_integer a, fixed_integer b, + fixed_integer& q, fixed_integer& r +) { + int shift_limbs; + int shift_bits; + + auto b_normalized=b; + b_normalized.set_negative(false); + normalize_divisor(b_normalized, shift_limbs, shift_bits); + + fixed_integer a_shifted; + a_shifted=a; + a_shifted.set_negative(false); + + a_shifted<<=shift_bits; + + fixed_integer q_unsigned; + divide_integers_impl(a_shifted, b_normalized, shift_limbs, q_unsigned, r); + + r>>=shift_bits; + + if (a.is_negative()!=b.is_negative()) { + if (r==fixed_integer(integer(0u))) { + q=q_unsigned; + q=-q; + } else { + q=q_unsigned+fixed_integer(integer(1u)); + q=-q; //q'=-q-1 + + auto abs_b=b; + abs_b.set_negative(false); + r=abs_b-r; + } + } else { + q=q_unsigned; + } + + // qb+r=a ; b>0: 0<=r +fixed_integer operator/( + fixed_integer a, fixed_integer b +) { + fixed_integer q; + fixed_integer r; + divide_integers(a, b, q, r); + return q; +} + +template +fixed_integer operator%( + fixed_integer a, fixed_integer b +) { + fixed_integer q; + fixed_integer r; + + b.set_negative(false); + divide_integers(a, b, q, r); + return r; +} + +fixed_integer calculate_reciprocal(uint64 high, uint64 low) { + assert((high>>63)!=0); //normalized + + fixed_integer a; + + a[5]=1u<<31; // a=2^191 ; normalized + + fixed_integer b; + b[0]=uint32(low>>32); + b[1]=uint32(high); + b[2]=uint32(high>>32); + b-=fixed_integer(integer(1)); + + return fixed_integer(to_uint64(a/b + fixed_integer(integer(1)))<<31); +} + +//result is >= the actual reciprocal. it is approximately 2^127/((HIGH | LOW)/2^127) +//the max value is 2^127 + 2^31 +/*fixed_integer calculate_reciprocal(uint64 high, uint64 low) { + assert((high>>63)!=0); //normalized + + //fixed_integer a + //a[5]=1u<<31; // a=2^191 ; normalized + + uint128 b=(uint128(high)<<32) | uint128(low>>32); + + uint64 reciprocal=calculate_reciprocal(uint32(high>>32), uint32(high)); + + fixed_integer b_shifted; + b_shifted=b; + b_shifted.left_shift_limbs(2); + + int quotient_size=3; + + for (int x=0;x<3;++x) { + uint64 qj=calculate_quotient(a[5-x], a[4-x], reciprocal, b[1]); + + auto b_shifted_qj=b_shifted; + b_shifted_qj*=qj; + a-=b_shifted_qj; + + while (a.is_negative()) { + //todo print( "slow division" ); + + --qj; + a+=b_shifted; + } + + b_shifted.right_shift_limbs(1); + + res[5-1-x]=qj; + } + + todo //get rid of this; use variable shifts + for (int x=0;x=max_quotient_size) { + break; + } + + res.right_shift_limbs(1); + ++quotient_size; + } + + q=res; + r=a; + + //todo print( "====" ); + + return fixed_integer(to_uint64(a/b + fixed_integer(integer(1)))<<31); +} */ + +//result is >= the actual quotient +uint64 calculate_quotient(uint64 high, uint64 low, fixed_integer reciprocal, uint64 b) { + fixed_integer both; + both[0]=low; + both[1]=high; + + //approximately (high | low) * (2^127/((HIGH | LOW)/2^127)) + // = (2^(127*2)*(high | low)/((HIGH | LOW)/2^64)/2^64 + // = (2^(127*2-64) * (high | low)/((HIGH | LOW)/2^64) + // = (2^190 * (high | low)/((HIGH | LOW)/2^64) + //need to right shift by 190 then, which is 2*64+62 + // + //max value of the product is (2^128-1)*(2^127 + 2^31) = 2^255 + 2^159 - 2^127 - 2^31 + + integer both_int(both); + integer reciprocal_int(reciprocal); + integer product_both_int(both_int*reciprocal_int); + + fixed_integer product_both(both*reciprocal); + assert(integer(product_both)==product_both_int); + + product_both.right_shift_limbs(2); + + product_both_int>>=128; + assert(integer(product_both)==product_both_int); + + fixed_integer product_high(product_both); + + //this can't overflow because the max value of the product has e.g. bit 254 cleared + product_high+=fixed_integer(integer(1)); + + product_high>>=64-2; + + uint64 res; + if (product_high[1]!=0) { + res=~uint64(0); + } else { + res=product_high[0]; + } + + //uint128 qj_128=((uint128(high)<<64) | uint128(low)) / uint128(b); + //uint64 qj=uint64(min( qj_128, uint128(~uint64(0)) )); + + //assert(res>=qj); this is wrong. res can be qj-1 sometimes + //assert(res<=qj+1); //optional + + return res; +} + +/*template +fixed_integer operator/( + fixed_integer a, fixed_integer b +) { + auto a_32=to_uint32(a); + auto b_32=to_uint32(b); + fixed_integer q_32; + fixed_integer r_32; + + divide_integers(a_32, b_32, q_32, r_32); + return to_uint64(q_32); +} + +template +fixed_integer operator%( + fixed_integer a, fixed_integer b +) { + auto a_32=to_uint32(a); + auto b_32=to_uint32(b); + fixed_integer q_32; + fixed_integer r_32; + + b_32.set_negative(false); + divide_integers(a_32, b_32, q_32, r_32); + return to_uint64(r_32); +}**/ \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/gpu_integer_gcd.h b/lib/chiavdf/fast_vdf/gpu_integer_gcd.h new file mode 100644 index 00000000..0188bde1 --- /dev/null +++ b/lib/chiavdf/fast_vdf/gpu_integer_gcd.h @@ -0,0 +1,118 @@ +template struct fixed_gcd_res { + fixed_integer gcd; //unsigned; final value of a + fixed_integer gcd_2; //unsigned; final value of b. this is 0 for a normal gcd + fixed_integer s; //signed + fixed_integer t; //signed + fixed_integer s_2; //signed + fixed_integer t_2; //signed +}; + +//threshold is 0 to calculate the normal gcd +//this calculates either s (u) or t (v) +template fixed_gcd_res gcd( + fixed_integer a_signed, fixed_integer b_signed, fixed_integer threshold, + bool calculate_u +) { + assert(!threshold.is_negative()); + + bool a_negative=a_signed.is_negative(); + bool b_negative=b_signed.is_negative(); + assert(!b_negative); + + array, 2> ab; //unsigned + ab[0]=a_signed; + ab[0].set_negative(false); + + ab[1]=b_signed; + ab[1].set_negative(false); + + array, 2> uv; //unsigned + + int parity; + + if (ab[0] res; + res.gcd=ab[0]; + res.gcd_2=ab[1]; + + //if a was negative, negate the parity + //if the parity is -1, negate the parity and negate the result u/v values. the parity is now 1 + //for u, u0 is positive and u1 is negative + //for v, v0 is negative and u1 is positive + if (calculate_u) { + res.s=uv[0]; + res.s.set_negative(a_negative != (parity==-1)); + + res.s_2=uv[1]; + res.s_2.set_negative(a_negative != (parity==1)); + } else { + res.t=uv[0]; + res.t.set_negative(a_negative != (parity==1)); + + res.t_2=uv[1]; + res.t_2.set_negative(a_negative != (parity==-1)); + } + + if (threshold.is_zero()) { + auto expected_gcd_res=gcd(integer(a_signed), integer(b_signed)); + assert(expected_gcd_res.gcd==integer(res.gcd)); + + if (calculate_u) { + assert(expected_gcd_res.s==integer(res.s)); + } else { + assert(expected_gcd_res.t==integer(res.t)); + } + } else { + //integer a_copy(a_signed); + //integer b_copy(a_signed); + //integer u_copy; + //integer v_copy; + //xgcd_partial(u_copy, v_copy, a_copy, b_copy, integer(threshold)); + + //assert(a_copy==res.gcd); + //assert(b_copy==res.gcd_2); + + //if (calculate_t) { + //assert(u_copy==-res.t); + //assert(v_copy==-res.t_2); + //} + } + + return res; +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/include.h b/lib/chiavdf/fast_vdf/include.h new file mode 100644 index 00000000..f2508552 --- /dev/null +++ b/lib/chiavdf/fast_vdf/include.h @@ -0,0 +1,58 @@ +#ifdef NDEBUG + #undef NDEBUG +#endif + +#if VDF_MODE==0 + #define NDEBUG +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "generic.h" + +#include +#include + +using namespace std; +using namespace generic; + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; +typedef unsigned __int128 uint128; +typedef __int128 int128; + +#define todo +#define USED __attribute__((used)) diff --git a/lib/chiavdf/fast_vdf/install.sh b/lib/chiavdf/fast_vdf/install.sh new file mode 100755 index 00000000..e8615ce5 --- /dev/null +++ b/lib/chiavdf/fast_vdf/install.sh @@ -0,0 +1,2 @@ +#!/bin/bash +./install_child.sh 2>&1 diff --git a/lib/chiavdf/fast_vdf/install_child.sh b/lib/chiavdf/fast_vdf/install_child.sh new file mode 100755 index 00000000..551170f2 --- /dev/null +++ b/lib/chiavdf/fast_vdf/install_child.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -v + +cat /proc/cpuinfo | grep -e MHz -e GHz +cat /proc/cpuinfo | grep flags | head -n 1 +enable_all_instructions=0 +if cat /proc/cpuinfo | grep -w avx2 | grep -w fma | grep -w -q adx; then + enable_all_instructions=1 +fi +echo "enable_all_instructions: $enable_all_instructions" + +sudo apt-get install libgmp3-dev -y +sudo apt-get install libflint-dev -y + +compile_flags="-std=c++1z -D CHIAOSX=1 -D VDF_MODE=0 -D ENABLE_ALL_INSTRUCTIONS=$enable_all_instructions -no-pie -march=native" +link_flags="-no-pie -lgmpxx -lgmp -lflint -lpthread" + +g++ -o compile_asm.o -c compile_asm.cpp $compile_flags -O0 +g++ -o compile_asm compile_asm.o $link_flags +./compile_asm +as -o asm_compiled.o asm_compiled.s +g++ -o vdf.o -c vdf.cpp $compile_flags -O3 +g++ -o vdf vdf.o asm_compiled.o $link_flags \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/integer.h b/lib/chiavdf/fast_vdf/integer.h new file mode 100644 index 00000000..2c0da83b --- /dev/null +++ b/lib/chiavdf/fast_vdf/integer.h @@ -0,0 +1,476 @@ +//note: gmp already has c++ bindings so could have just used those. oh well + +//const bool output_stats=false; +/*struct generic_stats { + vector entries; + + void add(int i) { + if (!output_stats) { + return; + } + + entries.push_back(i); + } + + void output(string name) { + if (!output_stats) { + return; + } + + sort(entries.begin(), entries.end()); + vector percentiles={0, 0.01, 0.1, 1, 10, 25, 50, 75, 90, 99, 99.9, 99.99, 100}; + + print( "::", name ); + print( " num =", entries.size() ); + + if (entries.empty()) { + return; + } + + for (double c : percentiles) { + int i=(c/100)*entries.size(); + if (i<0) { + i=0; + } + if (i>=entries.size()) { + i=entries.size()-1; + } + + print( " ", c, " -> ", entries.at(i) ); + } + + double total=0; + for (int c : entries) { + total+=c; + } + + print( " ", "avg", " -> ", total/double(entries.size()) ); + } +};*/ + +/*struct track_cycles { + generic_stats& stats; + uint64 start_time; + bool is_aborted=false; + + track_cycles(generic_stats& t_stats) : stats(t_stats) { + if (!enable_track_cycles) { + return; + } + + start_time=__rdtsc(); + } + + void abort() { + if (!enable_track_cycles) { + return; + } + + is_aborted=true; + } + + ~track_cycles() { + if (!enable_track_cycles) { + return; + } + + if (is_aborted) { + return; + } + + uint64 end_time=__rdtsc(); + uint64 delta=end_time-start_time; + int delta_int=delta; + if (delta_int==delta) { + stats.add(delta_int); + } else { + stats.add(INT_MAX); + } + } +};*/ + +struct track_max_type { + map, pair> data; + + void add(int line, string name, int value, bool negative) { + auto& v=data[make_pair(line, name)]; + v.first=max(v.first, value); + v.second|=negative; + } + + void output(int basis_bits) { + print( "== track max ==" ); + for (auto c : data) { + print(c.first.second, double(c.second.first)/basis_bits, c.second.second); + } + } +}; +track_max_type track_max; +//#define TRACK_MAX(data) track_max.add(#data " {" __func__ ":" "__LINE__" ")", (data).num_bits()) +#define TRACK_MAX(data) track_max.add(__LINE__, #data, (data).num_bits(), (data)<0) + +//typedef __mpz_struct mpz_t[1]; +typedef __mpz_struct mpz_struct; + +int mpz_num_bits_upper_bound(mpz_struct* v) { + return mpz_size(v)*sizeof(mp_limb_t)*8; +} + +static bool allow_integer_constructor=false; //don't want static integers because they use the wrong allocator + +struct integer { + mpz_struct impl[1]; + + ~integer() { + mpz_clear(impl); + } + + integer() { + assert(allow_integer_constructor); + mpz_init(impl); + } + + integer(const integer& t) { + mpz_init(impl); + mpz_set(impl, t.impl); + } + + integer(integer&& t) { + mpz_init(impl); + mpz_swap(impl, t.impl); + } + + explicit integer(int64 i) { + mpz_init(impl); + mpz_set_si(impl, i); + } + + explicit integer(const string& s) { + mpz_init(impl); + int res=mpz_set_str(impl, s.c_str(), 0); + assert(res==0); + } + + //lsb first + explicit integer(const vector& data) { + mpz_init(impl); + mpz_import(impl, data.size(), -1, 8, 0, 0, &data[0]); + } + + //lsb first + vector to_vector() const { + vector res; + res.resize(mpz_sizeinbase(impl, 2)/64 + 1, 0); + + size_t count; + mpz_export(&res[0], &count, -1, 8, 0, 0, impl); + res.resize(count); + + return res; + } + + integer& operator=(const integer& t) { + mpz_set(impl, t.impl); + return *this; + } + + integer& operator=(integer&& t) { + mpz_swap(impl, t.impl); + return *this; + } + + integer& operator=(int64 i) { + mpz_set_si(impl, i); + return *this; + } + + integer& operator=(const string& s) { + int res=mpz_set_str(impl, s.c_str(), 0); + assert(res==0); + return *this; + } + + void set_bit(int index, bool value) { + if (value) { + mpz_setbit(impl, index); + } else { + mpz_clrbit(impl, index); + } + } + + bool get_bit(int index) { + return mpz_tstbit(impl, index); + } + + USED string to_string() const { + char* res_char=mpz_get_str(nullptr, 16, impl); + string res_string="0x"; + res_string+=res_char; + + if (res_string.substr(0, 3)=="0x-") { + res_string.at(0)='-'; + res_string.at(1)='0'; + res_string.at(2)='x'; + } + + free(res_char); + return res_string; + } + + string to_string_dec() const { + char* res_char=mpz_get_str(nullptr, 10, impl); + string res_string=res_char; + free(res_char); + return res_string; + } + + integer& operator+=(const integer& t) { + mpz_add(impl, impl, t.impl); + return *this; + } + + integer operator+(const integer& t) const { + integer res; + mpz_add(res.impl, impl, t.impl); + return res; + } + + integer& operator-=(const integer& t) { + mpz_sub(impl, impl, t.impl); + return *this; + } + + integer operator-(const integer& t) const { + integer res; + mpz_sub(res.impl, impl, t.impl); + return res; + } + + integer& operator*=(const integer& t) { + mpz_mul(impl, impl, t.impl); + return *this; + } + + integer operator*(const integer& t) const { + integer res; + mpz_mul(res.impl, impl, t.impl); + return res; + } + + integer& operator<<=(int i) { + assert(i>=0); + mpz_mul_2exp(impl, impl, i); + return *this; + } + + integer operator<<(int i) const { + assert(i>=0); + integer res; + mpz_mul_2exp(res.impl, impl, i); + return res; + } + + integer operator-() const { + integer res; + mpz_neg(res.impl, impl); + return res; + } + + integer& operator/=(const integer& t) { + mpz_fdiv_q(impl, impl, t.impl); + return *this; + } + + integer operator/(const integer& t) const { + integer res; + mpz_fdiv_q(res.impl, impl, t.impl); + return res; + } + + integer& operator>>=(int i) { + assert(i>=0); + mpz_fdiv_q_2exp(impl, impl, i); + return *this; + } + + integer operator>>(int i) const { + assert(i>=0); + integer res; + mpz_fdiv_q_2exp(res.impl, impl, i); + return res; + } + + //this is different from mpz_fdiv_r because it ignores the sign of t + integer& operator%=(const integer& t) { + mpz_mod(impl, impl, t.impl); + return *this; + } + + integer operator%(const integer& t) const { + integer res; + mpz_mod(res.impl, impl, t.impl); + return res; + } + + integer fdiv_r(const integer& t) const { + integer res; + mpz_fdiv_r(res.impl, impl, t.impl); + return res; + } + + bool prime() const { + return mpz_probab_prime_p(impl, 50)!=0; + } + + bool operator<(const integer& t) const { + return mpz_cmp(impl, t.impl)<0; + } + + bool operator<=(const integer& t) const { + return mpz_cmp(impl, t.impl)<=0; + } + + bool operator==(const integer& t) const { + return mpz_cmp(impl, t.impl)==0; + } + + bool operator>=(const integer& t) const { + return mpz_cmp(impl, t.impl)>=0; + } + + bool operator>(const integer& t) const { + return mpz_cmp(impl, t.impl)>0; + } + + bool operator!=(const integer& t) const { + return mpz_cmp(impl, t.impl)!=0; + } + + bool operator<(int i) const { + return mpz_cmp_si(impl, i)<0; + } + + bool operator<=(int i) const { + return mpz_cmp_si(impl, i)<=0; + } + + bool operator==(int i) const { + return mpz_cmp_si(impl, i)==0; + } + + bool operator>=(int i) const { + return mpz_cmp_si(impl, i)>=0; + } + + bool operator>(int i) const { + return mpz_cmp_si(impl, i)>0; + } + + bool operator!=(int i) const { + return mpz_cmp_si(impl, i)!=0; + } + + int num_bits() const { + return mpz_sizeinbase(impl, 2); + } +}; + +integer abs(const integer& t) { + integer res; + mpz_abs(res.impl, t.impl); + return res; +} + +integer root(const integer& t, int n) { + integer res; + mpz_root(res.impl, t.impl, n); + return res; +} + +struct gcd_res { + integer gcd; + integer s; + integer t; +}; + +//a*s + b*t = gcd ; gcd>=0 +// abs(s) < abs(b) / (2 gcd) +// abs(t) < abs(a) / (2 gcd) +//(except if |s|<=1 or |t|<=1) +gcd_res gcd(const integer& a, const integer& b) { + gcd_res res; + + mpz_gcdext(res.gcd.impl, res.s.impl, res.t.impl, a.impl, b.impl); + + return res; +} + +integer rand_integer(int num_bits, int seed=-1) { + thread_local gmp_randstate_t state; + thread_local bool is_init=false; + + if (!is_init) { + gmp_randinit_mt(state); + gmp_randseed_ui(state, 0); + is_init=true; + } + + if (seed!=-1) { + gmp_randseed_ui(state, seed); + } + + integer res; + assert(num_bits>=0); + mpz_urandomb(res.impl, state, num_bits); + return res; +} + +//a and b are nonnegative +void xgcd_partial(integer& u, integer& v, integer& a, integer& b, const integer& L) { + fmpz_t f_u; fmpz_init(f_u); + fmpz_t f_v; fmpz_init(f_v); + fmpz_t f_a; fmpz_init(f_a); + fmpz_t f_b; fmpz_init(f_b); + fmpz_t f_L; fmpz_init(f_L); + + fmpz_set_mpz(f_a, a.impl); + fmpz_set_mpz(f_b, b.impl); + fmpz_set_mpz(f_L, L.impl); + + fmpz_xgcd_partial(f_u, f_v, f_a, f_b, f_L); + + fmpz_get_mpz(u.impl, f_u); + fmpz_get_mpz(v.impl, f_v); + fmpz_get_mpz(a.impl, f_a); + fmpz_get_mpz(b.impl, f_b); + + fmpz_clear(f_u); + fmpz_clear(f_v); + fmpz_clear(f_a); + fmpz_clear(f_b); + fmpz_clear(f_L); +} + +USED string to_string(mpz_struct* t) { + integer t_int; + mpz_set(t_int.impl, t); + return t_int.to_string(); +} + +void inject_error(mpz_struct* i) { + if (!enable_random_error_injection) { + return; + } + + mark_vdf_test(); + + double v=rand_integer(32).to_vector()[0]/double(1ull<<32); + + if (va, g->a) > 0) + { + qfb_nucomp(r, g, f, D, L); + return; + } + + fmpz_init(a1); fmpz_init(a2); fmpz_init(c2); + fmpz_init(ca); fmpz_init(cb); fmpz_init(cc); + fmpz_init(k); fmpz_init(m); + fmpz_init(s); fmpz_init(sp); fmpz_init(ss); + fmpz_init(t); fmpz_init(u2); fmpz_init(v1); fmpz_init(v2); + + /* nucomp calculation */ + + fmpz_set(a1, f->a); + fmpz_set(a2, g->a); + fmpz_set(c2, g->c); + + fmpz_add(ss, f->b, g->b); + fmpz_fdiv_q_2exp(ss, ss, 1); + + fmpz_sub(m, f->b, g->b); + fmpz_fdiv_q_2exp(m, m, 1); + + fmpz_fdiv_r(t, a2, a1); + if (fmpz_is_zero(t)) + { + fmpz_set_ui(v1, 0); + fmpz_set(sp, a1); + } else + fmpz_gcdinv(sp, v1, t, a1); + + fmpz_mul(k, m, v1); + fmpz_fdiv_r(k, k, a1); + + if (!fmpz_is_one(sp)) + { + fmpz_xgcd(s, v2, u2, ss, sp); + + fmpz_mul(k, k, u2); + fmpz_mul(t, v2, c2); + fmpz_sub(k, k, t); + + if (!fmpz_is_one(s)) + { + fmpz_fdiv_q(a1, a1, s); + fmpz_fdiv_q(a2, a2, s); + fmpz_mul(c2, c2, s); + } + + fmpz_fdiv_r(k, k, a1); + } + + if (fmpz_cmp(a1, L) < 0) + { + fmpz_mul(t, a2, k); + + fmpz_mul(ca, a2, a1); + + fmpz_mul_2exp(cb, t, 1); + fmpz_add(cb, cb, g->b); + + fmpz_add(cc, g->b, t); + fmpz_mul(cc, cc, k); + fmpz_add(cc, cc, c2); + + fmpz_fdiv_q(cc, cc, a1); + } else + { + fmpz_t m1, m2, r1, r2, co1, co2, temp; + + fmpz_init(m1); fmpz_init(m2); fmpz_init(r1); fmpz_init(r2); + fmpz_init(co1); fmpz_init(co2); fmpz_init(temp); + + fmpz_set(r2, a1); + fmpz_set(r1, k); + + fmpz_xgcd_partial(co2, co1, r2, r1, L); + + fmpz_mul(t, a2, r1); + fmpz_mul(m1, m, co1); + fmpz_add(m1, m1, t); + fmpz_tdiv_q(m1, m1, a1); + + fmpz_mul(m2, ss, r1); + fmpz_mul(temp, c2, co1); + fmpz_sub(m2, m2, temp); + fmpz_tdiv_q(m2, m2, a1); + + fmpz_mul(ca, r1, m1); + fmpz_mul(temp, co1, m2); + if (fmpz_sgn(co1) < 0) + fmpz_sub(ca, ca, temp); + else + fmpz_sub(ca, temp, ca); + + fmpz_mul(cb, ca, co2); + fmpz_sub(cb, t, cb); + fmpz_mul_2exp(cb, cb, 1); + fmpz_fdiv_q(cb, cb, co1); + fmpz_sub(cb, cb, g->b); + fmpz_mul_2exp(temp, ca, 1); + fmpz_fdiv_r(cb, cb, temp); + + fmpz_mul(cc, cb, cb); + fmpz_sub(cc, cc, D); + fmpz_fdiv_q(cc, cc, ca); + fmpz_fdiv_q_2exp(cc, cc, 2); + + if (fmpz_sgn(ca) < 0) + { + fmpz_neg(ca, ca); + fmpz_neg(cc, cc); + } + + fmpz_clear(m1); fmpz_clear(m2); fmpz_clear(r1); fmpz_clear(r2); + fmpz_clear(co1); fmpz_clear(co2); fmpz_clear(temp); + } + + fmpz_set(r->a, ca); + fmpz_set(r->b, cb); + fmpz_set(r->c, cc); + + fmpz_clear(ca); fmpz_clear(cb); fmpz_clear(cc); + fmpz_clear(k); fmpz_clear(m); + fmpz_clear(s); fmpz_clear(sp); fmpz_clear(ss); + fmpz_clear(t); fmpz_clear(u2); fmpz_clear(v1); fmpz_clear(v2); + fmpz_clear(a1); fmpz_clear(a2); fmpz_clear(c2); +} + +// a = b * c +void nucomp_form(form &a, form &b, form &c, integer &D, integer &L) { + qfb fr, fr2, fr3; + fmpz_init(fr.a); + fmpz_init(fr.b); + fmpz_init(fr.c); + fmpz_init(fr2.a); + fmpz_init(fr2.b); + fmpz_init(fr2.c); + fmpz_init(fr3.a); + fmpz_init(fr3.b); + fmpz_init(fr3.c); + fmpz_set_mpz(fr2.a, b.a.impl); + fmpz_set_mpz(fr2.b, b.b.impl); + fmpz_set_mpz(fr2.c, b.c.impl); + fmpz_set_mpz(fr3.a, c.a.impl); + fmpz_set_mpz(fr3.b, c.b.impl); + fmpz_set_mpz(fr3.c, c.c.impl); + fmpz_t anticD, anticL; + fmpz_init(anticD); + fmpz_init(anticL); + fmpz_set_mpz(anticD, D.impl); + fmpz_set_mpz(anticL, L.impl); + qfb_nucomp(&fr,&fr2,&fr3,anticD,anticL); + fmpz_get_mpz(a.a.impl,fr.a); + fmpz_get_mpz(a.b.impl,fr.b); + fmpz_get_mpz(a.c.impl,fr.c); +} diff --git a/lib/chiavdf/fast_vdf/parameters.h b/lib/chiavdf/fast_vdf/parameters.h new file mode 100644 index 00000000..0e59aa11 --- /dev/null +++ b/lib/chiavdf/fast_vdf/parameters.h @@ -0,0 +1,207 @@ +//have to pass one of these in as a macro +//#define VDF_MODE 0 //used for the final submission and correctness testing +//#define VDF_MODE 1 //used for performance or other testing + +//also have to pass in one of these +//#define ENABLE_ALL_INSTRUCTIONS 1 +//#define ENABLE_ALL_INSTRUCTIONS 0 + +// +// + +//divide table +const int divide_table_index_bits=11; +const int gcd_num_quotient_bits=31; //excludes sign bit +const int data_size=31; +const int gcd_base_max_iter_divide_table=16; + +//continued fraction table +const int gcd_table_num_exponent_bits=3; +const int gcd_table_num_fraction_bits=7; +const int gcd_base_max_iter=5; + +#if ENABLE_ALL_INSTRUCTIONS==1 + const bool use_divide_table=true; + const int gcd_base_bits=63; + const int gcd_128_max_iter=2; +#else + const bool use_divide_table=false; + const int gcd_base_bits=50; + const int gcd_128_max_iter=3; +#endif + +/* +divide_table_index bits +10 - 0m1.269s +11 - 0m1.261s +12 - 0m1.262s +13 - 0m1.341s +**/ + +/* +gcd_base_max_iter_divide_table +13 - 0m1.290s +14 - 0m1.275s +15 - 0m1.265s +16 - 0m1.261s +17 - 0m1.268s +18 - 0m1.278s +19 - 0m1.283s +**/ + +/* +100k iterations; median of 3 runs. consistency between runs was very high + +effect of scheduler: +taskset 0,1 : 0m1.352s (63% speedup single thread, 37% over 0,2) +taskset 0,2 : 0m1.850s +default : 0m1.348s (fastest) +single threaded : 0m2.212s [this has gone down to 0m1.496s for some reason with the divide table] + +exponent fraction base_bits base_iter 128_iter seconds +3 7 50 5 3 0m1.350s [fastest with range checks enabled] +3 7 52 5 3 0m1.318s [range checks disabled; 2.4% faster] + +[this block with bmi and fma disabled] +3 7 46 5 3 0m1.426s +3 7 47 5 3 0m1.417s +3 7 48 5 3 0m1.421s +3 7 49 5 3 0m1.413s +3 7 50 5 3 0m1.401s [still fastest; bmi+fma is 3.8% faster] +3 7 51 5 3 0m1.406s +3 7 52 5 3 0m1.460s +3 7 50 6 3 0m1.416s + +3 7 49 6 3 0m1.376s + +2 8 45 6 3 0m1.590s +2 8 49 6 3 0m1.485s +2 8 51 6 3 0m1.479s +2 8 52 6 3 0m1.501s +2 8 53 6 3 0m1.531s +2 8 54 6 3 0m13.675s +2 8 55 6 3 0m13.648s + +3 7 49 2 3 0m14.571s +3 7 49 3 3 0m1.597s +3 7 49 4 3 0m1.430s +3 7 49 5 3 0m1.348s +3 7 49 6 3 0m1.376s +3 7 49 10 3 0m1.485s + +3 7 49 1 18 0m2.226s +3 7 49 2 10 0m1.756s +3 7 49 3 6 0m1.557s +3 7 49 4 4 0m1.388s +3 7 49 5 4 0m1.525s +3 7 49 6 3 0m1.377s +3 7 49 7 3 0m1.446s +3 7 49 8 2 0m1.503s + +3 6 45 4 3 0m15.176s +3 7 45 4 3 0m1.443s +3 8 45 4 3 0m1.386s +3 9 45 4 3 0m1.355s +3 10 45 4 3 0m1.353s +3 11 45 4 3 0m1.419s +3 12 45 4 3 0m1.451s +3 13 45 4 3 0m1.584s + +3 7 40 4 2 0m1.611s +3 8 40 4 2 0m1.570s +3 9 40 4 2 0m1.554s +3 10 40 4 2 0m1.594s +3 11 40 4 2 0m1.622s +3 12 40 4 2 0m1.674s +3 13 40 4 2 0m1.832s + +3 7 48 5 3 0m1.358s +3 7 49 5 3 0m1.353s +3 7 50 5 3 0m1.350s + +3 8 48 5 3 0m1.366s +3 8 49 5 3 0m1.349s +3 8 50 5 3 0m1.334s + +3 9 48 5 3 0m1.370s +3 9 49 5 3 0m1.349s +3 9 50 5 3 0m1.346s + +3 10 48 5 3 0m1.404s +3 10 49 5 3 0m1.382s +3 10 50 5 3 0m1.379s +***/ + +const uint64 max_spin_counter=10000000; + +//this value makes square_original not be called in 100k iterations. with every iteration reduced, minimum value is 1 +const int num_extra_bits_ab=3; + +const bool calculate_k_repeated_mod=false; +const bool calculate_k_repeated_mod_interval=1; + +const int validate_interval=1; //power of 2. will check the discriminant in the slave thread at this interval. -1 to disable. no effect on performance +const int checkpoint_interval=10000; //at each checkpoint, the slave thread is restarted and the master thread calculates c +//checkpoint_interval=100000: 39388 +//checkpoint_interval=10000: 39249 cycles per fast iteration +//checkpoint_interval=1000: 38939 +//checkpoint_interval=100: 39988 +//no effect on performance (with track cycles enabled) + +// ==== test ==== +#if VDF_MODE==1 + #define VDF_TEST + const bool is_vdf_test=true; + + const bool enable_random_error_injection=false; + const double random_error_injection_rate=0; //0 to 1 + + //#define GENERATE_ASM_TRACKING_DATA + //#define ENABLE_TRACK_CYCLES + const bool vdf_test_correctness=false; + const bool enable_threads=true; +#endif + +// ==== production ==== +#if VDF_MODE==0 + const bool is_vdf_test=false; + + const bool enable_random_error_injection=false; + const double random_error_injection_rate=0; //0 to 1 + + const bool vdf_test_correctness=false; + const bool enable_threads=true; + + //#define ENABLE_TRACK_CYCLES +#endif + +// +// + +//this doesn't do anything outside of test code +//this doesn't work with the divide table currently +#define TEST_ASM + +const int gcd_size=20; //multiple of 4. must be at least half the discriminant size in bits divided by 64 + +const int gcd_max_iterations=gcd_size*2; //typically 1 iteration per limb + +const int max_bits_base=1024; //half the discriminant number of bits, rounded up +const int reduce_max_iterations=10000; + +const int num_asm_tracking_data=128; +bool enable_all_instructions=ENABLE_ALL_INSTRUCTIONS; + +//if the asm code doesn't use fma, the c code shouldn't either to be the same as the asm code +const bool enable_fma_in_c_code=ENABLE_ALL_INSTRUCTIONS; + +const int track_cycles_num_buckets=24; //each bucket is from 2^i to 2^(i+1) cycles +const int track_cycles_max_num=128; + +void mark_vdf_test() { + static bool did_warning=false; + if (!is_vdf_test && !did_warning) { + print( "test code enabled in production build" ); + did_warning=true; + } +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/picosha2.h b/lib/chiavdf/fast_vdf/picosha2.h new file mode 100644 index 00000000..ebd685e6 --- /dev/null +++ b/lib/chiavdf/fast_vdf/picosha2.h @@ -0,0 +1,377 @@ +/* +The MIT License (MIT) + +Copyright (C) 2017 okdshin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef PICOSHA2_H +#define PICOSHA2_H +// picosha2:20140213 + +#ifndef PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR +#define PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR \ + 1048576 //=1024*1024: default is 1MB memory +#endif + +#include +#include +#include +#include +#include +#include +namespace picosha2 { +typedef unsigned long word_t; +typedef unsigned char byte_t; + +static const size_t k_digest_size = 32; + +namespace detail { +inline byte_t mask_8bit(byte_t x) { return x & 0xff; } + +inline word_t mask_32bit(word_t x) { return x & 0xffffffff; } + +const word_t add_constant[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + +const word_t initial_message_digest[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, + 0xa54ff53a, 0x510e527f, 0x9b05688c, + 0x1f83d9ab, 0x5be0cd19}; + +inline word_t ch(word_t x, word_t y, word_t z) { return (x & y) ^ ((~x) & z); } + +inline word_t maj(word_t x, word_t y, word_t z) { + return (x & y) ^ (x & z) ^ (y & z); +} + +inline word_t rotr(word_t x, std::size_t n) { + assert(n < 32); + return mask_32bit((x >> n) | (x << (32 - n))); +} + +inline word_t bsig0(word_t x) { return rotr(x, 2) ^ rotr(x, 13) ^ rotr(x, 22); } + +inline word_t bsig1(word_t x) { return rotr(x, 6) ^ rotr(x, 11) ^ rotr(x, 25); } + +inline word_t shr(word_t x, std::size_t n) { + assert(n < 32); + return x >> n; +} + +inline word_t ssig0(word_t x) { return rotr(x, 7) ^ rotr(x, 18) ^ shr(x, 3); } + +inline word_t ssig1(word_t x) { return rotr(x, 17) ^ rotr(x, 19) ^ shr(x, 10); } + +template +void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 last) { + assert(first + 64 == last); + static_cast(last); // for avoiding unused-variable warning + word_t w[64]; + std::fill(w, w + 64, 0); + for (std::size_t i = 0; i < 16; ++i) { + w[i] = (static_cast(mask_8bit(*(first + i * 4))) << 24) | + (static_cast(mask_8bit(*(first + i * 4 + 1))) << 16) | + (static_cast(mask_8bit(*(first + i * 4 + 2))) << 8) | + (static_cast(mask_8bit(*(first + i * 4 + 3)))); + } + for (std::size_t i = 16; i < 64; ++i) { + w[i] = mask_32bit(ssig1(w[i - 2]) + w[i - 7] + ssig0(w[i - 15]) + + w[i - 16]); + } + + word_t a = *message_digest; + word_t b = *(message_digest + 1); + word_t c = *(message_digest + 2); + word_t d = *(message_digest + 3); + word_t e = *(message_digest + 4); + word_t f = *(message_digest + 5); + word_t g = *(message_digest + 6); + word_t h = *(message_digest + 7); + + for (std::size_t i = 0; i < 64; ++i) { + word_t temp1 = h + bsig1(e) + ch(e, f, g) + add_constant[i] + w[i]; + word_t temp2 = bsig0(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = mask_32bit(d + temp1); + d = c; + c = b; + b = a; + a = mask_32bit(temp1 + temp2); + } + *message_digest += a; + *(message_digest + 1) += b; + *(message_digest + 2) += c; + *(message_digest + 3) += d; + *(message_digest + 4) += e; + *(message_digest + 5) += f; + *(message_digest + 6) += g; + *(message_digest + 7) += h; + for (std::size_t i = 0; i < 8; ++i) { + *(message_digest + i) = mask_32bit(*(message_digest + i)); + } +} + +} // namespace detail + +template +void output_hex(InIter first, InIter last, std::ostream& os) { + os.setf(std::ios::hex, std::ios::basefield); + while (first != last) { + os.width(2); + os.fill('0'); + os << static_cast(*first); + ++first; + } + os.setf(std::ios::dec, std::ios::basefield); +} + +template +void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str) { + std::ostringstream oss; + output_hex(first, last, oss); + hex_str.assign(oss.str()); +} + +template +void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str) { + bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str); +} + +template +std::string bytes_to_hex_string(InIter first, InIter last) { + std::string hex_str; + bytes_to_hex_string(first, last, hex_str); + return hex_str; +} + +template +std::string bytes_to_hex_string(const InContainer& bytes) { + std::string hex_str; + bytes_to_hex_string(bytes, hex_str); + return hex_str; +} + +class hash256_one_by_one { + public: + hash256_one_by_one() { init(); } + + void init() { + buffer_.clear(); + std::fill(data_length_digits_, data_length_digits_ + 4, 0); + std::copy(detail::initial_message_digest, + detail::initial_message_digest + 8, h_); + } + + template + void process(RaIter first, RaIter last) { + add_to_data_length(static_cast(std::distance(first, last))); + std::copy(first, last, std::back_inserter(buffer_)); + std::size_t i = 0; + for (; i + 64 <= buffer_.size(); i += 64) { + detail::hash256_block(h_, buffer_.begin() + i, + buffer_.begin() + i + 64); + } + buffer_.erase(buffer_.begin(), buffer_.begin() + i); + } + + void finish() { + byte_t temp[64]; + std::fill(temp, temp + 64, 0); + std::size_t remains = buffer_.size(); + std::copy(buffer_.begin(), buffer_.end(), temp); + temp[remains] = 0x80; + + if (remains > 55) { + std::fill(temp + remains + 1, temp + 64, 0); + detail::hash256_block(h_, temp, temp + 64); + std::fill(temp, temp + 64 - 4, 0); + } else { + std::fill(temp + remains + 1, temp + 64 - 4, 0); + } + + write_data_bit_length(&(temp[56])); + detail::hash256_block(h_, temp, temp + 64); + } + + template + void get_hash_bytes(OutIter first, OutIter last) const { + for (const word_t* iter = h_; iter != h_ + 8; ++iter) { + for (std::size_t i = 0; i < 4 && first != last; ++i) { + *(first++) = detail::mask_8bit( + static_cast((*iter >> (24 - 8 * i)))); + } + } + } + + private: + void add_to_data_length(word_t n) { + word_t carry = 0; + data_length_digits_[0] += n; + for (std::size_t i = 0; i < 4; ++i) { + data_length_digits_[i] += carry; + if (data_length_digits_[i] >= 65536u) { + carry = data_length_digits_[i] >> 16; + data_length_digits_[i] &= 65535u; + } else { + break; + } + } + } + void write_data_bit_length(byte_t* begin) { + word_t data_bit_length_digits[4]; + std::copy(data_length_digits_, data_length_digits_ + 4, + data_bit_length_digits); + + // convert byte length to bit length (multiply 8 or shift 3 times left) + word_t carry = 0; + for (std::size_t i = 0; i < 4; ++i) { + word_t before_val = data_bit_length_digits[i]; + data_bit_length_digits[i] <<= 3; + data_bit_length_digits[i] |= carry; + data_bit_length_digits[i] &= 65535u; + carry = (before_val >> (16 - 3)) & 65535u; + } + + // write data_bit_length + for (int i = 3; i >= 0; --i) { + (*begin++) = static_cast(data_bit_length_digits[i] >> 8); + (*begin++) = static_cast(data_bit_length_digits[i]); + } + } + std::vector buffer_; + word_t data_length_digits_[4]; // as 64bit integer (16bit x 4 integer) + word_t h_[8]; +}; + +inline void get_hash_hex_string(const hash256_one_by_one& hasher, + std::string& hex_str) { + byte_t hash[k_digest_size]; + hasher.get_hash_bytes(hash, hash + k_digest_size); + return bytes_to_hex_string(hash, hash + k_digest_size, hex_str); +} + +inline std::string get_hash_hex_string(const hash256_one_by_one& hasher) { + std::string hex_str; + get_hash_hex_string(hasher, hex_str); + return hex_str; +} + +namespace impl { +template +void hash256_impl(RaIter first, RaIter last, OutIter first2, OutIter last2, int, + std::random_access_iterator_tag) { + hash256_one_by_one hasher; + // hasher.init(); + hasher.process(first, last); + hasher.finish(); + hasher.get_hash_bytes(first2, last2); +} + +template +void hash256_impl(InputIter first, InputIter last, OutIter first2, + OutIter last2, int buffer_size, std::input_iterator_tag) { + std::vector buffer(buffer_size); + hash256_one_by_one hasher; + // hasher.init(); + while (first != last) { + int size = buffer_size; + for (int i = 0; i != buffer_size; ++i, ++first) { + if (first == last) { + size = i; + break; + } + buffer[i] = *first; + } + hasher.process(buffer.begin(), buffer.begin() + size); + } + hasher.finish(); + hasher.get_hash_bytes(first2, last2); +} +} + +template +void hash256(InIter first, InIter last, OutIter first2, OutIter last2, + int buffer_size = PICOSHA2_BUFFER_SIZE_FOR_INPUT_ITERATOR) { + picosha2::impl::hash256_impl( + first, last, first2, last2, buffer_size, + typename std::iterator_traits::iterator_category()); +} + +template +void hash256(InIter first, InIter last, OutContainer& dst) { + hash256(first, last, dst.begin(), dst.end()); +} + +template +void hash256(const InContainer& src, OutIter first, OutIter last) { + hash256(src.begin(), src.end(), first, last); +} + +template +void hash256(const InContainer& src, OutContainer& dst) { + hash256(src.begin(), src.end(), dst.begin(), dst.end()); +} + +template +void hash256_hex_string(InIter first, InIter last, std::string& hex_str) { + byte_t hashed[k_digest_size]; + hash256(first, last, hashed, hashed + k_digest_size); + std::ostringstream oss; + output_hex(hashed, hashed + k_digest_size, oss); + hex_str.assign(oss.str()); +} + +template +std::string hash256_hex_string(InIter first, InIter last) { + std::string hex_str; + hash256_hex_string(first, last, hex_str); + return hex_str; +} + +inline void hash256_hex_string(const std::string& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); +} + +template +void hash256_hex_string(const InContainer& src, std::string& hex_str) { + hash256_hex_string(src.begin(), src.end(), hex_str); +} + +template +std::string hash256_hex_string(const InContainer& src) { + return hash256_hex_string(src.begin(), src.end()); +} +templatevoid hash256(std::ifstream& f, OutIter first, OutIter last){ + hash256(std::istreambuf_iterator(f), std::istreambuf_iterator(), first,last); + +} +}// namespace picosha2 +#endif // PICOSHA2_H \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/run.sh b/lib/chiavdf/fast_vdf/run.sh new file mode 100755 index 00000000..7a870d71 --- /dev/null +++ b/lib/chiavdf/fast_vdf/run.sh @@ -0,0 +1,2 @@ +#!/bin/bash +./vdf $1 $2 diff --git a/lib/chiavdf/fast_vdf/sconstruct b/lib/chiavdf/fast_vdf/sconstruct new file mode 100755 index 00000000..a0fb50b9 --- /dev/null +++ b/lib/chiavdf/fast_vdf/sconstruct @@ -0,0 +1,24 @@ +import gch + +ccflags=' -O0' +#ccflags=' -O3' + +ccflags = '-D VDF_MODE=1 -D ENABLE_ALL_INSTRUCTIONS=0 -no-pie -march=native' + ccflags + +env.Append( + CCFLAGS=ccflags, + LINKFLAGS= '-no-pie', + LIBS=['gmpxx', 'gmp', 'flint', 'pthread'] +); + +gch.generate(env); + +env['precompiled_header']=File('include.h'); +env['Gch']=env.Gch(target='include.h.gch', source=env['precompiled_header']); + +#env.Program('gcd_test.cpp'); +#env.Program('vdf.cpp'); + +env.Program( 'compile_asm', 'compile_asm.cpp', CCFLAGS = ccflags + ' -O0' ); +env.Command( 'asm_compiled.s', 'compile_asm', "./compile_asm" ); +env.Program( 'vdf', [ 'vdf.cpp', 'asm_compiled.s' ] ); \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/threading.h b/lib/chiavdf/fast_vdf/threading.h new file mode 100644 index 00000000..e71f752f --- /dev/null +++ b/lib/chiavdf/fast_vdf/threading.h @@ -0,0 +1,898 @@ +#include + +//mp_limb_t is an unsigned integer +static_assert(sizeof(mp_limb_t)==8, ""); + +static_assert(sizeof(unsigned long int)==8, ""); +static_assert(sizeof(long int)==8, ""); + +#ifdef ENABLE_TRACK_CYCLES + const int track_cycles_array_size=track_cycles_max_num*track_cycles_num_buckets; + + thread_local int track_cycles_next_slot=0; + thread_local array track_cycles_cycle_counters; + thread_local array track_cycles_call_counters; + thread_local array track_cycles_names; + + void track_cycles_init() { + thread_local bool is_init=false; + if (!is_init) { + //print( &track_cycles_names ); + + //track_cycles_cycle_counters=new uint64[]; + //track_cycles_call_counters=new uint64[track_cycles_max_num*track_cycles_num_buckets]; + //track_cycles_names=new const char*[track_cycles_max_num]; + + for (int x=0;x=0 && slot=track_cycles_num_buckets) { + return; + } + + assert(num_bits>=0 && num_bits=0 && slot=0 && index>6)<<6; + + void* res=boost::alignment::aligned_alloc(64, aligned_bytes); // aligned_alloc(64, aligned_bytes); + assert((uint64(res)&63)==0); //must be aligned for correctness + return res; +} + +void* mp_alloc_func(size_t new_bytes) { + void* res=alloc_cache_line(new_bytes); + assert((uint64(res)&63)==0); //all memory used by gmp must be cache line aligned + return res; +} + +void mp_free_func(void* old_ptr, size_t old_bytes) { + //either mp_alloc_func allocated old_ptr and it is 64-aligned, or it points to data in mpz and its address equals 16 modulo 64 + assert((uint64(old_ptr)&63)==0 || (uint64(old_ptr)&63)==16); + + if ((uint64(old_ptr)&63)==0) { + //mp_alloc_func allocated this, so it can be freed with std::free + boost::alignment::aligned_free(old_ptr); //free(old_ptr); + } else { + //this is part of the mpz struct defined below. it can't be freed, so do nothing + } +} + +void* mp_realloc_func(void* old_ptr, size_t old_bytes, size_t new_bytes) { + void* res=mp_alloc_func(new_bytes); + + memcpy(res, old_ptr, (old_bytes struct alignas(64) mpz : public mpz_base { + static const int expected_size=d_expected_size; + static const int padded_size=d_padded_size; + + static_assert(expected_size>=1 && expected_size<=padded_size, ""); + + uint64 data[padded_size]; //must not be cache line aligned + + bool was_reallocated() const { + return c_mpz._mp_d!=data; + } + + //can't call any mpz functions here because it is global + mpz() { + c_mpz._mp_size=0; + c_mpz._mp_d=(mp_limb_t *)data; + c_mpz._mp_alloc=padded_size; + + //this is supposed to be cache line aligned so that the next assert works + assert((uint64(this)&63)==0); + + //mp_free_func uses this to decide whether to free or not + assert((uint64(c_mpz._mp_d)&63)==16); + } + + ~mpz() { + if (is_vdf_test) { + //don't want this to happen for performance reasons + assert(!was_reallocated()); + } + + //if c_mpz.data wasn't reallocated, it has to point to this instance's data and not some other instance's data + //if mpz_swap was used, this might be violated + assert((uint64(c_mpz._mp_d)&63)==0 || c_mpz._mp_d==data); + mpz_clear(&c_mpz); + } + + mpz(const mpz& t)=delete; + mpz(mpz&& t)=delete; + + mpz& operator=(const mpz_struct* t) { + mpz_set(*this, t); + return *this; + } + + mpz& operator=(const mpz& t) { + mpz_set(*this, t); + return *this; + } + + mpz& operator=(mpz&& t) { + mpz_set(*this, t); //do not use mpz_swap + return *this; + } + + /*mpz& operator=(const mpz_base& t) { + mpz_set(*this, t); + return *this; + } + + mpz& operator=(mpz_base&& t) { + mpz_set(*this, t); //do not use mpz_swap + return *this; + }*/ + + mpz& operator=(uint64 i) { + mpz_set_ui(*this, i); + return *this; + } + + mpz& operator=(int64 i) { + mpz_set_si(*this, i); + return *this; + } + + mpz& operator=(const string& s) { + int res=mpz_set_str(*this, s.c_str(), 0); + assert(res==0); + return *this; + } + + USED string to_string() const { + char* res_char=mpz_get_str(nullptr, 16, *this); + string res_string = "0x"; + res_string+=res_char; + + if (res_string.substr(0, 3) == "0x-") { + res_string.at(0)='-'; + res_string.at(1)='0'; + res_string.at(2)='x'; + } + + free(res_char); + return res_string; + } + + USED string to_string_dec() const { + char* res_char=mpz_get_str(nullptr, 10, *this); + string res_string=res_char; + free(res_char); + return res_string; + } + + //sets *this to a+b + void set_add(const mpz_struct* a, const mpz_struct* b) { + mpz_add(*this, a, b); + } + + void set_add(const mpz_struct* a, uint64 b) { + mpz_add_ui(*this, a, b); + } + + mpz& operator+=(const mpz_struct* t) { + set_add(*this, t); + return *this; + } + + mpz& operator+=(uint64 t) { + set_add(*this, t); + return *this; + } + + void set_sub(const mpz_struct* a, const mpz_struct* b) { + mpz_sub(*this, a, b); + } + + void set_sub(const mpz_struct* a, uint64 b) { + mpz_sub_ui(*this, a, b); + } + + template void set_sub(uint64 a, const mpz_b& b) { + mpz_ui_sub(*this, a, b); + } + + mpz& operator-=(const mpz_struct* t) { + set_sub(*this, t); + return *this; + } + + void set_mul(const mpz_struct* a, const mpz_struct* b) { + mpz_mul(*this, a, b); + } + + void set_mul(const mpz_struct* a, int64 b) { + mpz_mul_si(*this, a, b); + } + + void set_mul(const mpz_struct* a, uint64 b) { + mpz_mul_ui(*this, a, b); + } + + mpz& operator*=(const mpz_struct* t) { + set_mul(*this, t); + return *this; + } + + mpz& operator*=(int64 t) { + set_mul(*this, t); + return *this; + } + + mpz& operator*=(uint64 t) { + set_mul(*this, t); + return *this; + } + + void set_left_shift(const mpz_struct* a, int i) { + assert(i>=0); + mpz_mul_2exp(*this, a, i); + } + + mpz& operator<<=(int i) { + set_left_shift(*this, i); + return *this; + } + + //*this+=a*b + void set_add_mul(const mpz_struct* a, const mpz_struct* b) { + mpz_addmul(*this, a, b); + } + + void set_add_mul(const mpz_struct* a, uint64 b) { + mpz_addmul_ui(*this, a, b); + } + + //*this-=a*b + void set_sub_mul(const mpz_struct* a, const mpz_struct* b) { + mpz_submul(*this, a, b); + } + + void set_sub_mul(const mpz_struct* a, uint64 b) { + mpz_submul_ui(*this, a, b); + } + + void negate() { + mpz_neg(*this, *this); + } + + void abs() { + mpz_abs(*this, *this); + } + + void set_divide_floor(const mpz_struct* a, const mpz_struct* b) { + if (mpz_sgn(b)==0) { + assert(false); + return; + } + + mpz_fdiv_q(*this, a, b); + } + + void set_divide_floor(const mpz_struct* a, const mpz_struct* b, mpz_struct* remainder) { + if (mpz_sgn(b)==0) { + assert(false); + return; + } + + mpz_fdiv_qr(*this, remainder, a, b); + } + + void set_divide_exact(const mpz_struct* a, const mpz_struct* b) { + if (mpz_sgn(b)==0) { + assert(false); + return; + } + + mpz_divexact(*this, a, b); + } + + void set_mod(const mpz_struct* a, const mpz_struct* b) { + if (mpz_sgn(b)==0) { + assert(false); + return; + } + + mpz_mod(*this, a, b); + } + + mpz& operator%=(const mpz_struct* t) { + set_mod(*this, t); + return *this; + } + + bool divisible_by(const mpz_struct* a) const { + if (mpz_sgn(a)==0) { + assert(false); + return false; + } + + return mpz_divisible_p(*this, a); + } + + void set_right_shift(const mpz_struct* a, int i) { + assert(i>=0); + mpz_tdiv_q_2exp(*this, *this, i); + } + + //note: this uses truncation rounding + mpz& operator>>=(int i) { + set_right_shift(*this, i); + return *this; + } + + bool operator<(const mpz_struct* t) const { return mpz_cmp(*this, t)<0; } + bool operator<=(const mpz_struct* t) const { return mpz_cmp(*this, t)<=0; } + bool operator==(const mpz_struct* t) const { return mpz_cmp(*this, t)==0; } + bool operator>=(const mpz_struct* t) const { return mpz_cmp(*this, t)>=0; } + bool operator>(const mpz_struct* t) const { return mpz_cmp(*this, t)>0; } + bool operator!=(const mpz_struct* t) const { return mpz_cmp(*this, t)!=0; } + + bool operator<(int64 i) const { return mpz_cmp_si(*this, i)<0; } + bool operator<=(int64 i) const { return mpz_cmp_si(*this, i)<=0; } + bool operator==(int64 i) const { return mpz_cmp_si(*this, i)==0; } + bool operator>=(int64 i) const { return mpz_cmp_si(*this, i)>=0; } + bool operator>(int64 i) const { return mpz_cmp_si(*this, i)>0; } + bool operator!=(int64 i) const { return mpz_cmp_si(*this, i)!=0; } + + bool operator<(uint64 i) const { return mpz_cmp_ui(_(), i)<0; } + bool operator<=(uint64 i) const { return mpz_cmp_ui(_(), i)<=0; } + bool operator==(uint64 i) const { return mpz_cmp_ui(_(), i)==0; } + bool operator>=(uint64 i) const { return mpz_cmp_ui(_(), i)>=0; } + bool operator>(uint64 i) const { return mpz_cmp_ui(_(), i)>0; } + bool operator!=(uint64 i) const { return mpz_cmp_ui(_(), i)!=0; } + + int compare_abs(const mpz_struct* t) const { + return mpz_cmpabs(*this, t); + } + + int compare_abs(uint64 t) const { + return mpz_cmpabs_ui(*this, t); + } + + //returns 0 if *this==0 + int sgn() const { + return mpz_sgn(_()); + } + + int num_bits() const { + return mpz_sizeinbase(*this, 2); + } + + //0 if this is 0 + int num_limbs() const { + return mpz_size(*this); + } + + const uint64* read_limbs() const { + return (uint64*)mpz_limbs_read(*this); + } + + //limbs are uninitialized. call finish + uint64* write_limbs(int num) { + return (uint64*)mpz_limbs_write(*this, num); + } + + //limbs are zero padded to the specified size. call finish + uint64* modify_limbs(int num) { + int old_size=num_limbs(); + + uint64* res=(uint64*)mpz_limbs_modify(*this, num); + + //gmp doesn't do this + for (int x=old_size;x array to_array() const { + assert(size>=num_limbs()); + + array res; + for (int x=0;x struct cache_line_ptr { + type* ptr=nullptr; + + cache_line_ptr() {} + cache_line_ptr(cache_line_ptr& t)=delete; + cache_line_ptr(cache_line_ptr&& t) { swap(ptr, t.ptr); } + + cache_line_ptr& operator=(cache_line_ptr& t)=delete; + cache_line_ptr& operator=(cache_line_ptr&& t) { swap(ptr, t.ptr); } + + ~cache_line_ptr() { + if (ptr) { + ptr->~type(); + boost::alignment::aligned_free(ptr); // wjb free(ptr); + ptr=nullptr; + } + } + + type& operator*() const { return *ptr; } + type* operator->() const { return ptr; } +}; + +template cache_line_ptr make_cache_line(arg_types&&... args) { + cache_line_ptr res; + res.ptr=(type*)alloc_cache_line(sizeof(type)); + new(res.ptr) type(forward(args)...); + return res; +} + +template void prefetch(const type& p) { + //write prefetching lowers performance but read prefetching increases it + if (is_write) return; + + for (int x=0;x void prefetch_write(const type& p) { prefetch(p); } +template void prefetch_read(const type& p) { prefetch(p); } + +void memory_barrier() { + asm volatile( "" ::: "memory" ); +} + +struct alignas(64) thread_counter { + uint64 counter_value=0; //updated atomically since only one thread can write to it + uint64 error_flag=0; + + void reset() { + memory_barrier(); + counter_value=0; + error_flag=0; + memory_barrier(); + } + + thread_counter() { + assert((uint64(this)&63)==0); + } +}; + +thread_counter master_counter[100]; +thread_counter slave_counter[100]; + +struct thread_state { + int pairindex; + bool is_slave=false; + uint64 counter_start=0; + uint64 last_fence=0; + + void reset() { + is_slave=false; + counter_start=0; + last_fence=0; + } + + thread_counter& this_counter() { + return (is_slave)? slave_counter[pairindex] : master_counter[pairindex]; + } + + thread_counter& other_counter() { + return (is_slave)? master_counter[pairindex] : slave_counter[pairindex]; + } + + void raise_error() { + //if (is_vdf_test) { + //print( "raise_error", is_slave ); + //} + + memory_barrier(); + this_counter().error_flag=1; + other_counter().error_flag=1; + memory_barrier(); + } + + const uint64 v() { + return this_counter().counter_value; + } + + //waits for the other thread to have at least this counter value + //returns false if an error has been raised + bool fence_absolute(uint64 t_v) { + if (last_fence>=t_v) { + return true; + } + + memory_barrier(); + + uint64 spin_counter=0; + while (other_counter().counter_value < t_v) { + if (this_counter().error_flag || other_counter().error_flag) { + raise_error(); + break; + } + + if (spin_counter>max_spin_counter) { + if (is_vdf_test) { + print( "spin_counter too high", is_slave ); + } + + raise_error(); + break; + } + + ++spin_counter; + memory_barrier(); + } + + memory_barrier(); + + if (!(this_counter().error_flag)) { + last_fence=t_v; + } + + return !(this_counter().error_flag); + } + + bool fence(int delta) { + return fence_absolute(counter_start+uint64(delta)); + } + + //increases this thread's counter value. it can only be increased + //returns false if an error has been raised + bool advance_absolute(uint64 t_v) { + if (t_v==v()) { + return true; + } + + memory_barrier(); //wait for all writes to finish (on x86 this doesn't do anything but the compiler still needs it) + + assert(t_v>=v()); + + if (this_counter().error_flag) { + raise_error(); + } + + this_counter().counter_value=t_v; + + memory_barrier(); //want the counter writes to be low latency so prevent the compiler from caching it + return !(this_counter().error_flag); + } + + bool advance(int delta) { + return advance_absolute(counter_start+uint64(delta)); + } + + bool has_error() { + return this_counter().error_flag; + } + + /*void wait_for_error_to_be_cleared() { + assert(is_slave && enable_threads); + while (this_counter().error_flag) { + memory_barrier(); + } + } + + void clear_error() { + assert(!is_slave); + + memory_barrier(); + this_counter().error_flag=0; + other_counter().error_flag=0; + memory_barrier(); + }*/ +}; + +thread_local thread_state c_thread_state; + +struct alignas(64) gcd_uv_entry { + //these are uninitialized for the first entry + uint64 u_0; + uint64 u_1; + uint64 v_0; + uint64 v_1; + uint64 parity; //1 if odd, 0 if even + + uint64 exit_flag; //1 if last, else 0 + + uint64 unused_0; + uint64 unused_1; + + template void matrix_multiply(const mpz_type& in_a, const mpz_type& in_b, mpz_type& out_a, mpz_type& out_b) const { + out_a.set_mul((parity==0)? in_a : in_b, (parity==0)? u_0 : v_0); + out_a.set_sub_mul((parity==0)? in_b : in_a, (parity==0)? v_0 : u_0); + + out_b.set_mul((parity==0)? in_b : in_a, (parity==0)? v_1 : u_1); + out_b.set_sub_mul((parity==0)? in_a : in_b, (parity==0)? u_1 : v_1); + } +}; +static_assert(sizeof(gcd_uv_entry)==64, ""); + +template struct alignas(64) gcd_results_type { + mpz_type as[2]; + mpz_type bs[2]; + + static const int num_counter=gcd_max_iterations+1; //one per outputted entry + + array uv_entries; + + int end_index=0; + + mpz_type& get_a_start() { + return as[0]; + } + + mpz_type& get_b_start() { + return bs[0]; + } + + mpz_type& get_a_end() { + assert(end_index>=0 && end_index<2); + return as[end_index]; + } + + mpz_type& get_b_end() { + assert(end_index>=0 && end_index<2); + return bs[end_index]; + } + + //this will increase the counter value and wait until the result at index is available + //index 0 only has exit_flag initialized + bool get_entry(int counter_start_delta, int index, const gcd_uv_entry** res) const { + *res=nullptr; + + if (index>=gcd_max_iterations+1) { + c_thread_state.raise_error(); + return false; + } + + assert(index>=0); + + if (!c_thread_state.fence(counter_start_delta + index+1)) { + return false; + } + + *res=&uv_entries[index]; + return true; + } +}; + +//a and b in c_results should be initialized +//returns false if the gcd failed +//this assumes that all inputs are unsigned, a>=b, and a>=threshold +//this will increase the counter value as results are generated +template bool gcd_unsigned( + int counter_start_delta, gcd_results_type& c_results, const array& threshold +) { + if (c_thread_state.has_error()) { + return false; + } + + int a_limbs=c_results.get_a_start().num_limbs(); + int b_limbs=c_results.get_b_start().num_limbs(); + + if (a_limbs>gcd_size || b_limbs>gcd_size) { + c_thread_state.raise_error(); + return false; + } + + asm_code::asm_func_gcd_unsigned_data data; + data.a=c_results.as[0].modify_limbs(gcd_size); + data.b=c_results.bs[0].modify_limbs(gcd_size); + data.a_2=c_results.as[1].write_limbs(gcd_size); + data.b_2=c_results.bs[1].write_limbs(gcd_size); + data.threshold=(uint64*)&threshold[0]; + + data.uv_counter_start=c_thread_state.counter_start+counter_start_delta+1; + data.out_uv_counter_addr=&(c_thread_state.this_counter().counter_value); + data.out_uv_addr=(uint64*)&(c_results.uv_entries[1]); + data.iter=-1; + data.a_end_index=(a_limbs==0)? 0 : a_limbs-1; + + if (is_vdf_test) { + assert((uint64(data.out_uv_addr)&63)==0); //should be cache line aligned + } + + memory_barrier(); + int error_code=asm_code::asm_func_gcd_unsigned(&data); + memory_barrier(); + + if (error_code!=0) { + c_thread_state.raise_error(); + return false; + } + + assert(data.iter>=0 && data.iter<=gcd_max_iterations); //total number of iterations performed + bool is_even=((data.iter-1)&1)==0; //parity of last iteration (can be -1) + + c_results.end_index=(is_even)? 1 : 0; + + c_results.as[0].finish(gcd_size); + c_results.as[1].finish(gcd_size); + c_results.bs[0].finish(gcd_size); + c_results.bs[1].finish(gcd_size); + + inject_error(c_results.as[0]); + inject_error(c_results.as[1]); + inject_error(c_results.bs[0]); + inject_error(c_results.bs[1]); + + if (!c_thread_state.advance(counter_start_delta+gcd_results_type::num_counter)) { + return false; + } + + return true; +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/upload.sh b/lib/chiavdf/fast_vdf/upload.sh new file mode 100755 index 00000000..0b538e45 --- /dev/null +++ b/lib/chiavdf/fast_vdf/upload.sh @@ -0,0 +1,2 @@ +#!/bin/bash +scp *.c *.cpp *.h *.sh sconstruct $VM:projects/chia_vdf/ diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp new file mode 100644 index 00000000..70cc8eea --- /dev/null +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -0,0 +1,721 @@ +#include "include.h" + +#include "parameters.h" + +#include "bit_manipulation.h" +#include "double_utility.h" +#include "integer.h" + +#include "asm_main.h" + +#include "vdf_original.h" + +#include "vdf_new.h" +#include "picosha2.h" + +#include "gpu_integer.h" +#include "gpu_integer_divide.h" + +#include "gcd_base_continued_fractions.h" +//#include "gcd_base_divide_table.h" +#include "gcd_128.h" +#include "gcd_unsigned.h" + +#include "gpu_integer_gcd.h" + +#include "asm_types.h" + +#include "threading.h" +#include "nucomp.h" +#include "vdf_fast.h" + +#include "vdf_test.h" +#include +#include + +#include +#include + +#include + +#include "ClassGroup.h" +#include "Reducer.h" + +bool warn_on_corruption_in_production=false; + +struct akashnil_form { + // y = ax^2 + bxy + y^2 + mpz_t a; + mpz_t b; + mpz_t c; + // mpz_t d; // discriminant +}; + +const int64_t THRESH = 1UL<<31; +const int64_t EXP_THRESH = 31; + + +//always works +void repeated_square_original(form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { + vdf_original::form f_in,*f_res; + f_in.a[0]=f.a.impl[0]; + f_in.b[0]=f.b.impl[0]; + f_in.c[0]=f.c.impl[0]; + f_res=&f_in; + + for (uint64_t i=0; i < iterations; i++) { + f_res = vdf_original::square(*f_res); + + if(nuduplListener!=NULL) + nuduplListener->OnIteration(NL_FORM,f_res,base+i); + } + + mpz_set(f.a.impl, f_res->a); + mpz_set(f.b.impl, f_res->b); + mpz_set(f.c.impl, f_res->c); + + //vdf_original::form f_res=vdf_original::repeated_square(&f_in, base, iterations); +} + + +class WesolowskiCallback :public INUDUPLListener { +public: + uint64_t kl; + + struct form *forms; + form result; + + bool deferred; + int64_t switch_iters = -1; + int64_t switch_index; + int64_t iterations; + + integer D; + integer L; + + ClassGroupContext *t; + Reducer *reducer; + + WesolowskiCallback(uint64_t expected_space) { + forms = (form*) malloc(sizeof(struct form) * expected_space); + + t=new ClassGroupContext(4096); + reducer=new Reducer(*t); + } + + ~WesolowskiCallback() { + free(forms); + + delete(reducer); + delete(t); + } + + void reduce(form& inf) { +#if 0 + // Old reduce from Sundersoft form + inf.reduce(); +#else + // Pulmark reduce based on Akashnil reduce + mpz_set(t->a, inf.a.impl); + mpz_set(t->b, inf.b.impl); + mpz_set(t->c, inf.c.impl); + + reducer->run(); + + mpz_set(inf.a.impl, t->a); + mpz_set(inf.b.impl, t->b); + mpz_set(inf.c.impl, t->c); +#endif + } + + void IncreaseConstants(int num_iters) { + kl = 100; + switch_iters = num_iters; + switch_index = num_iters / 10; + } + + int GetPosition(int power) { + if (switch_iters == -1 || power < switch_iters) { + return power / 10; + } else { + return (switch_index + (power - switch_iters) / 100); + } + } + + form *GetForm(int power) { + return &(forms[GetPosition(power)]); + } + + form GetFormFromCheckpoint(int power) { + uint64 checkpoint = power - power % 100; + form checkpoint_form; + mpz_init(checkpoint_form.a.impl); + mpz_init(checkpoint_form.b.impl); + mpz_init(checkpoint_form.c.impl); + checkpoint_form = forms[GetPosition(checkpoint)]; + repeated_square_original(checkpoint_form, D, L, 0, power % 100, NULL); + return checkpoint_form; + } + + void OnIteration(int type, void *data, uint64 iteration) + { + iteration++; + + //cout << iteration << " " << maxiterations << endl; + if(iteration%kl==0) + { + form *mulf=GetForm(iteration); + // Initialize since it is raw memory + mpz_inits(mulf->a.impl,mulf->b.impl,mulf->c.impl,NULL); + + switch(type) + { + case NL_SQUARESTATE: + { + //cout << "NL_SQUARESTATE" << endl; + uint64 res; + + square_state_type *square_state=(square_state_type *)data; + + if(!square_state->assign(mulf->a, mulf->b, mulf->c, res)) + cout << "square_state->assign failed" << endl; + break; + } + case NL_FORM: + { + //cout << "NL_FORM" << endl; + + vdf_original::form *f=(vdf_original::form *)data; + + mpz_set(mulf->a.impl, f->a); + mpz_set(mulf->b.impl, f->b); + mpz_set(mulf->c.impl, f->c); + break; + } + default: + cout << "Unknown case" << endl; + } + reduce(*mulf); + + iterations=iteration; // safe to access now + } + } +}; + +void ApproximateParameters(uint64_t T, uint64_t& L, uint64_t& k, uint64_t& w) { + double log_memory = 23.25349666; + double log_T = log2(T); + L = 1; + if (log_T - log_memory > 0.000001) { + L = ceil(pow(2, log_memory - 20)); + } + double intermediate = T * (double)0.6931471 / (2.0 * L); + k = std::max(std::round(log(intermediate) - log(log(intermediate)) + 0.25), 1.0); + //w = floor((double) T / ((double) T/k + L * (1 << (k+1)))) - 2; + w = 2; +} + +// thread safe; but it is only called from the main thread +void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallback &weso, bool& stopped) { + #ifdef VDF_TEST + uint64 num_calls_fast=0; + uint64 num_iterations_fast=0; + uint64 num_iterations_slow=0; + #endif + + uint64_t num_iterations = 0; + + while (!stopped) { + uint64 c_checkpoint_interval=checkpoint_interval; + + #ifdef VDF_TEST + form f_copy; + form f_copy_3; + bool f_copy_3_valid=false; + if (vdf_test_correctness) { + f_copy=f; + c_checkpoint_interval=1; + + f_copy_3=f; + f_copy_3_valid=square_fast_impl(f_copy_3, D, L, num_iterations); + } + #endif + + uint64 batch_size=c_checkpoint_interval; + + #ifdef ENABLE_TRACK_CYCLES + print( "track cycles enabled; results will be wrong" ); + repeated_square_original(f, D, L, 100); //randomize the a and b values + #endif + + // This works single threaded + square_state_type square_state; + square_state.pairindex=0; + + uint64 actual_iterations=repeated_square_fast(square_state, f, D, L, num_iterations, batch_size, &weso); + + #ifdef VDF_TEST + ++num_calls_fast; + if (actual_iterations!=~uint64(0)) num_iterations_fast+=actual_iterations; + #endif + + #ifdef ENABLE_TRACK_CYCLES + print( "track cycles actual iterations", actual_iterations ); + return; //exit the program + #endif + + if (actual_iterations==~uint64(0)) { + //corruption; f is unchanged. do the entire batch with the slow algorithm + repeated_square_original(f, D, L, num_iterations, batch_size, &weso); + actual_iterations=batch_size; + + #ifdef VDF_TEST + num_iterations_slow+=batch_size; + #endif + + if (warn_on_corruption_in_production) { + print( "!!!! corruption detected and corrected !!!!" ); + } + } + + if (actual_iterations ConvertIntegerToBytes(integer x, uint64_t num_bytes) { + std::vector bytes; + bool negative = false; + if (x < 0) { + x = abs(x); + x = x - integer(1); + negative = true; + } + for (int iter = 0; iter < num_bytes; iter++) { + auto byte = (x % integer(256)).to_vector(); + if (negative) + byte[0] ^= 255; + bytes.push_back(byte[0]); + x = x / integer(256); + } + std::reverse(bytes.begin(), bytes.end()); + return bytes; +} + +integer HashPrime(std::vector s) { + std::string prime = "prime"; + uint32_t j = 0; + while (true) { + std::vector input(prime.begin(), prime.end()); + std::vector j_to_bytes = ConvertIntegerToBytes(integer(j), 8); + input.insert(input.end(), j_to_bytes.begin(), j_to_bytes.end()); + input.insert(input.end(), s.begin(), s.end()); + std::vector hash(picosha2::k_digest_size); + picosha2::hash256(input.begin(), input.end(), hash.begin(), hash.end()); + + integer prime_integer; + for (int i = 0; i < 16; i++) { + prime_integer *= integer(256); + prime_integer += integer(hash[i]); + } + if (prime_integer.prime()) { + return prime_integer; + } + j++; + } +} + +std::vector SerializeForm(WesolowskiCallback &weso, form &y, int int_size) { + //weso.reduce(y); + y.reduce(); + std::vector res = ConvertIntegerToBytes(y.a, int_size); + std::vector b_res = ConvertIntegerToBytes(y.b, int_size); + res.insert(res.end(), b_res.begin(), b_res.end()); + return res; +} + +integer GetB(WesolowskiCallback &weso, integer& D, form &x, form& y) { + int int_size = (D.num_bits() + 16) >> 4; + std::vector serialization = SerializeForm(weso, x, int_size); + std::vector serialization_y = SerializeForm(weso, y, int_size); + serialization.insert(serialization.end(), serialization_y.begin(), serialization_y.end()); + return HashPrime(serialization); +} + +integer FastPow(uint64_t a, uint64_t b, integer& c) { + if (b == 0) + return integer(1); + + integer res = FastPow(a, b / 2, c); + res = res * res; + res = res % c; + if (b % 2) { + res = res * integer(a); + res = res % c; + } + return res; +} + +form FastPowForm(form &x, const integer& D, uint64_t num_iterations) { + if (num_iterations == 0) + return form::identity(D); + + form res = FastPowForm(x, D, num_iterations / 2); + res = res * res; + if (num_iterations % 2) + res = res * x; + return res; +} + +uint64_t GetBlock(uint64_t i, uint64_t k, uint64_t T, integer& B) { + integer res(1 << k); + res *= FastPow(2, T - k * (i + 1), B); + res = res / B; + auto res_vector = res.to_vector(); + return res_vector[0]; +} + +std::string BytesToStr(const std::vector &in) +{ + std::vector::const_iterator from = in.cbegin(); + std::vector::const_iterator to = in.cend(); + std::ostringstream oss; + for (; from != to; ++from) + oss << std::hex << std::setw(2) << std::setfill('0') << static_cast(*from); + return oss.str(); +} + +struct Proof { + Proof() { + + } + + Proof(std::vector y, std::vector proof) { + this->y = y; + this->proof = proof; + } + + string hex() { + std::vector bytes(y); + bytes.insert(bytes.end(), proof.begin(), proof.end()); + return BytesToStr(bytes); + } + + std::vector y; + std::vector proof; +}; + +#define PULMARK 1 + +form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, uint64_t num_iterations, uint64_t k, uint64_t l, WesolowskiCallback& weso) { + auto t1 = std::chrono::high_resolution_clock::now(); + +#if PULMARK + ClassGroupContext *t; + Reducer *reducer; + + t=new ClassGroupContext(4096); + reducer=new Reducer(*t); +#endif + + integer B = GetB(weso, D, x_init, y); + integer L=root(-D, 4); + + uint64_t k1 = k / 2; + uint64_t k0 = k - k1; + + form x = form::identity(D); + + for (int64_t j = l - 1; j >= 0; j--) { + x=FastPowForm(x, D, (1 << k)); + + std::vector
ys((1 << k)); + for (uint64_t i = 0; i < (1 << k); i++) + ys[i] = form::identity(D); + + form *tmp; + for (uint64_t i = 0; i < ceil(1.0 * num_iterations / (k * l)); i++) { + if (num_iterations >= k * (i * l + j + 1)) { + uint64_t b = GetBlock(i*l + j, k, num_iterations, B); + tmp = weso.GetForm(done_iterations + i * k * l); + nucomp_form(ys[b], ys[b], *tmp, D, L); +#if PULMARK + // Pulmark reduce based on Akashnil reduce + mpz_set(t->a, ys[b].a.impl); + mpz_set(t->b, ys[b].b.impl); + mpz_set(t->c, ys[b].c.impl); + + reducer->run(); + + mpz_set(ys[b].a.impl, t->a); + mpz_set(ys[b].b.impl, t->b); + mpz_set(ys[b].c.impl, t->c); +#else + ys[b].reduce(); +#endif + } + } + + for (uint64_t b1 = 0; b1 < (1 << k1); b1++) { + form z = form::identity(D); + for (uint64_t b0 = 0; b0 < (1 << k0); b0++) { + nucomp_form(z, z, ys[b1 * (1 << k0) + b0], D, L); +#if PULMARK + // Pulmark reduce based on Akashnil reduce + mpz_set(t->a, z.a.impl); + mpz_set(t->b, z.b.impl); + mpz_set(t->c, z.c.impl); + + reducer->run(); + + mpz_set(z.a.impl, t->a); + mpz_set(z.b.impl, t->b); + mpz_set(z.c.impl, t->c); +#else + z.reduce(); +#endif + } + z = FastPowForm(z, D, b1 * (1 << k0)); + x = x * z; + } + + for (uint64_t b0 = 0; b0 < (1 << k0); b0++) { + form z = form::identity(D); + for (uint64_t b1 = 0; b1 < (1 << k1); b1++) { + nucomp_form(z, z, ys[b1 * (1 << k0) + b0], D, L); +#if PULMARK + // Pulmark reduce based on Akashnil reduce + mpz_set(t->a, z.a.impl); + mpz_set(t->b, z.b.impl); + mpz_set(t->c, z.c.impl); + + reducer->run(); + + mpz_set(z.a.impl, t->a); + mpz_set(z.b.impl, t->b); + mpz_set(z.c.impl, t->c); +#else + z.reduce(); +#endif + } + z = FastPowForm(z, D, b0); + x = x * z; + } + } + +#if PULMARK + // Pulmark reduce based on Akashnil reduce + mpz_set(t->a, x.a.impl); + mpz_set(t->b, x.b.impl); + mpz_set(t->c, x.c.impl); + + reducer->run(); + + mpz_set(x.a.impl, t->a); + mpz_set(x.b.impl, t->b); + mpz_set(x.c.impl, t->c); + + delete(reducer); + delete(t); +#else + x.reduce(); +#endif + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(t2 - t1).count(); + return x; +} + +void GenerateProofThreaded(std::promise && form_promise, form y, form x_init, integer D, uint64_t done_iterations, uint64_t num_iterations, uint64_t +k, uint64_t l, WesolowskiCallback& weso) { + form proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso); + form_promise.set_value(proof); +} + +Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, uint64_t done_iterations, WesolowskiCallback& weso) { + uint64_t l, k, w; + form x_init = x; + integer L=root(-D, 4); + + k = 10; + w = 2; + l = (num_iterations >= 10000000) ? 10 : 1; + + while (weso.iterations < done_iterations + num_iterations) { + std::this_thread::sleep_for (std::chrono::seconds(3)); + } + + form y = weso.GetFormFromCheckpoint(done_iterations + num_iterations); + auto proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso); + + int int_size = (D.num_bits() + 16) >> 4; + + std::vector y_bytes = SerializeForm(weso, y, 129); + std::vector proof_bytes = SerializeForm(weso, proof, int_size); + Proof final_proof=Proof(y_bytes, proof_bytes); + + return final_proof; +} + +Proof CreateProofOfTimeNWesolowski(integer& D, form x, int64_t num_iterations, + uint64_t done_iterations, WesolowskiCallback& weso, int depth_limit, int depth) { + uint64_t l, k, w; + int64_t iterations1, iterations2; + integer L=root(-D, 4); + form x_init = x; + + k = 10; + w = 2; + l = (num_iterations >= 10000000) ? 10 : 1; + iterations1 = num_iterations * w / (w + 1); + + // NOTE(Florin): This is still suboptimal, + // some work can still be lost if weso iterations is in between iterations1 and num_iterations. + if (weso.iterations >= done_iterations + num_iterations) { + iterations1 = (done_iterations + num_iterations) / 3; + } + + iterations1 = iterations1 - iterations1 % 100; + iterations2 = num_iterations - iterations1; + + while (weso.iterations < done_iterations + iterations1) { + std::this_thread::sleep_for (std::chrono::seconds(3)); + } + + + form y1 = *weso.GetForm(done_iterations + iterations1); + + std::promise form_promise; + auto form_future = form_promise.get_future(); + + std::thread t(&GenerateProofThreaded, std::move(form_promise), y1, x_init, D, done_iterations, iterations1, k, l, std::ref(weso)); + + Proof proof2; + if (depth < depth_limit - 1) { + proof2 = CreateProofOfTimeNWesolowski(D, y1, iterations2, done_iterations + iterations1, weso, depth_limit, depth + 1); + } else { + proof2 = CreateProofOfTimeWesolowski(D, y1, iterations2, done_iterations + iterations1, weso); + } + + t.join(); + form proof = form_future.get(); + + int int_size = (D.num_bits() + 16) >> 4; + Proof final_proof; + final_proof.y = proof2.y; + std::vector proof_bytes(proof2.proof); + std::vector tmp = ConvertIntegerToBytes(integer(iterations1), 8); + proof_bytes.insert(proof_bytes.end(), tmp.begin(), tmp.end()); + tmp.clear(); + tmp = SerializeForm(weso, y1, int_size); + proof_bytes.insert(proof_bytes.end(), tmp.begin(), tmp.end()); + tmp.clear(); + tmp = SerializeForm(weso, proof, int_size); + proof_bytes.insert(proof_bytes.end(), tmp.begin(), tmp.end()); + final_proof.proof = proof_bytes; + return final_proof; +} + +std::mutex main_mutex; + +void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallback& weso) { + Proof result = CreateProofOfTimeNWesolowski(D, x, num_iterations, 0, weso, 2, 0); + std::lock_guard lock(main_mutex); + std::cout << BytesToStr(ConvertIntegerToBytes(integer(num_iterations), 8)); + std::cout << result.hex() << "\n" << std::flush; +} + +int main(int argc, char* argv[]) { + if (getenv( "warn_on_corruption_in_production" )!=nullptr) { + warn_on_corruption_in_production=true; + } + if (is_vdf_test) { + print( "=== Test mode ===" ); + } + if (warn_on_corruption_in_production) { + print( "=== Warn on corruption enabled ===" ); + } + assert(is_vdf_test); //assertions should be disabled in VDF_MODE==0 + init_gmp(); + allow_integer_constructor=true; //make sure the old gmp allocator isn't used + set_rounding_mode(); + vdf_original::init(); + + integer D(argv[1]); + integer L=root(-D, 4); + form f=form::generator(D); + + bool stop_signal = false; + uint64_t num_iterations; + std::set seen_iterations; + + std::vector threads; + WesolowskiCallback weso(100000000); + + mpz_init(weso.forms[0].a.impl); + mpz_init(weso.forms[0].b.impl); + mpz_init(weso.forms[0].c.impl); + + weso.forms[0]=f; + weso.D = D; + weso.L = L; + weso.kl = 10; + + std::thread vdf_worker(repeated_square, f, D, L, std::ref(weso), std::ref(stop_signal)); + + while(!stop_signal) { + std::this_thread::sleep_for (std::chrono::seconds(2)); + + cin >> num_iterations; + if (seen_iterations.size() > 0 && num_iterations >= *seen_iterations.begin()) + continue; + + if (num_iterations == 0) { + for (int t = 0; t < threads.size(); t++) { + threads[t].join(); + } + stop_signal = true; + vdf_worker.join(); + std::lock_guard lock(main_mutex); + for (int i = 0; i < 100; i++) + std::cout << "0"; + std::cout << "\n" << std::flush; + } else { + if (seen_iterations.find(num_iterations) == seen_iterations.end()) { + seen_iterations.insert(num_iterations); + threads.push_back(std::thread(NWesolowskiMain, D, f, num_iterations, std::ref(weso))); + } + } + } +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/vdf_fast.h b/lib/chiavdf/fast_vdf/vdf_fast.h new file mode 100644 index 00000000..080952b6 --- /dev/null +++ b/lib/chiavdf/fast_vdf/vdf_fast.h @@ -0,0 +1,1144 @@ +typedef mpz< 9, 14> mpz_9 ; //2 cache lines +typedef mpz<17, 22> mpz_17; //3 cache lines +typedef mpz<25, 30> mpz_25; //4 cache lines +typedef mpz<33, 38> mpz_33; //5 cache lines + +static_assert(sizeof(mpz_9 )==2*64); +static_assert(sizeof(mpz_17)==3*64); +static_assert(sizeof(mpz_25)==4*64); +static_assert(sizeof(mpz_33)==5*64); + +//these all have at least 64 extra bits before they reallocate +//x is the discriminant number of bits divided by 4 +typedef mpz_9 int1x; +typedef mpz_17 int2x; +typedef mpz_25 int3x; +typedef mpz_33 int4x; + +typedef gcd_results_type gcd_results_int2x; + +//this is accessed by both threads +//all divisions are exact +struct square_state_type { + int pairindex; + + //running the gcd will advance the counter value by this much on both the master and slave threads + //it is then advanced by 1 after the gcd results are consumed + static const int gcd_num_counter=gcd_results_int2x::num_counter; + + //these are constants so they don't need to be prefetched + struct phase_constant_type { + int4x D; // D>=0 + int1x L; // L>=0 + alignas(64) array gcd_zero; + alignas(64) array gcd_L; + } phase_constant; + + //the master assigns the new values of A and B + struct phase_start_type { + //int2x wjba; + //int2x wjbb; + + int2x as[2]; // a>=0 + int2x bs[2]; // b>=0 + alignas(64) int ab_index=0; //index of the start a/b values. the new values will be written to the other slot in this array + alignas(8) bool b_higher_magnitude_than_a=false; //also true if b==a + alignas(8) uint64 num_valid_iterations=0; + alignas(8) bool corruption_flag=false; //the slave thread will set this if c is invalid + + int2x& a() { return as[ab_index]; } + int2x& b() { return bs[ab_index]; } + int2x& A() { return as[1-ab_index]; } + int2x& B() { return bs[1-ab_index]; } + } phase_start; + + static const int counter_start_phase_0=0; + static const int counter_start_phase_1=counter_start_phase_0+gcd_num_counter+1; + static const int counter_start_phase_2=counter_start_phase_1+gcd_num_counter+1; + static const int counter_start_phase_3=counter_start_phase_2+1; + static const int counter_start_phase_4=counter_start_phase_3+1; + static const int counter_start_phase_5=counter_start_phase_4+1; + + // + // + + struct phase_0_master_d_type { + gcd_results_int2x gcd_1_0; // gcd(b,a,0); a'=1; b'=0 ; U0*b + V0*a = 1 ; U1*b + V1*a = 0 + } phase_0_master_d; + + struct phase_0_slave_d_type { + int4x b_b; // b_b_D=b^2 + int2x a_4; // a_4=4a=a<<2 + int4x b_b_D; // b_b_D=b^2-D + int4x c; // c=(b^2-D)/(4a) + int4x c_remainder; //only assigned if c is being validated + + //initially: + //U0=-c ; U1=0 + //if |b|<|a|: swap(U0, U1) ; *=-1 + //if b<0: *=-1 + // + //to apply a matrix Z*[X0 -Y0 ; -X1 Y1] where Z=local parity (-1 or 1) ; X=local u (unsigned) ; Y=local v (unsigned): + //-do the matrix multiplication by the column vector + //-reduce each component of the result modulo a. this can be delayed until more matrix multiplications have happened if necessary + int4x U0s[2]; + int4x U1s[2]; + alignas(64) int k_index=0; // k=(-U0*c)%a ; k>=0 ; kL ; b'=t<=L ; u0*a + v0*k = s ; u1*a + v1*k = t + // the final values of s and t fit in an int1x + + int2x& s() { return gcd_s_t.get_a_end(); } + int2x& t() { return gcd_s_t.get_b_end(); } + } phase_1_slave_d; + + + // + // + + struct phase_2_master_d_type { + int3x c_v1; // c_v1 = c*v1 + int3x b_t; // b_t = b*t + int3x b_t_c_v1; // b_t_c_v1 = b*t+c*v1 + int2x h; // h = S*(b*t+c*v1)/a + int2x v1_h; // v1_h=v1*h + } phase_2_master_d; + + struct phase_2_slave_d_type { + int2x t_t_S; // t_t_S = t*t*S + int1x v0_2; // v0_2 = 2*v0 = v0<<1 + } phase_2_slave_d; + + // + // + + struct phase_3_master_d_type { + // A = t*t*S + v1*h ; A=as[1-ab_index] + int2x A_2; // A_2 = A*2 = A<<1 + } phase_3_master_d; + + struct phase_3_slave_d_type { + int2x S_t_v0; // S_t_v0 = S*t*v0 + int2x a_S_t_v0; // a_S_t_v0 = a + S*t*v0 + int3x t_2_a_S_t_v0; // t_2_a_S_t_v0 = 2t*(a + S*t*v0) + int1x t_2; // t_2 = 2t = t<<1 + int2x t_2_a_S_t_v0_v1; // t_2_a_S_t_v0_v1 = (2t*(a + S*t*v0))/v1 + int2x e; // e = -b - (2t*(a + S*t*v0))/v1 + } phase_3_slave_d; + + // + // + + struct phase_4_master_d_type { + int2x v0_2_h; // v0_2_h = 2*v0*h + int2x f; // f = e - 2*v0*h + // B = f % (2A) + // A = |A| + // assign b_higher_magnitude_than_a + } phase_4_master_d; + struct phase_4_slave_d_type { + } phase_4_slave_d; + + + // + // ========================================================================================================================== + // + + bool phase_0_master() { + { + TRACK_CYCLES //100 + if (!c_thread_state.fence(counter_start_phase_0)) { + TRACK_CYCLES_ABORT + return false; + } + } + + { + //overhead of track_cycles + TRACK_CYCLES //60 + } + + prefetch_write(phase_0_master_d); + prefetch_write(phase_1_master_d); + prefetch_write(phase_2_master_d); + prefetch_write(phase_3_master_d); + prefetch_write(phase_4_master_d); + prefetch_write(phase_start.A()); + prefetch_write(phase_start.B()); + + const auto& gcd_zero=phase_constant.gcd_zero; + const auto& L=phase_constant.L; + + const auto& a=phase_start.a(); prefetch_read(a); + const auto& b=phase_start.b(); prefetch_read(b); + + const int max_bits_ab=max_bits_base + num_extra_bits_ab; + + //sometimes the nudupl code won't reduce the output all the way. if it has too many bits it will get reduced by calling + // square_original + bool ab_valid; + { + TRACK_CYCLES //185 + ab_valid=(a.num_bits()<=max_bits_ab && b.num_bits()<=max_bits_ab && a.sgn()>=0); + } + if (!ab_valid) { + return false; + } + + //a>L if this is true (both are nonnegative) + //usually a has twice as many limbs as L + bool a_high_enough; + { + TRACK_CYCLES //102 + a_high_enough=(a.num_limbs()>L.num_limbs()); + } + if (!a_high_enough) { + return false; + } + + auto& gcd_1_0=phase_0_master_d.gcd_1_0; + + { + TRACK_CYCLES //345 + gcd_1_0.get_a_start()=(phase_start.b_higher_magnitude_than_a)? b : a; + gcd_1_0.get_b_start()=(phase_start.b_higher_magnitude_than_a)? a : b; + } + + { + TRACK_CYCLES //16070 (critical path 1) + if (!gcd_unsigned(counter_start_phase_0, gcd_1_0, gcd_zero)) { + TRACK_CYCLES_ABORT + return false; + } + } + + return true; + } + + bool phase_0_slave() { + { + TRACK_CYCLES //1698 (doesn't matter) + if (!c_thread_state.fence(counter_start_phase_0)) { + TRACK_CYCLES_ABORT + return false; + } + } + + prefetch_write(phase_0_slave_d); + prefetch_write(phase_1_slave_d); + prefetch_write(phase_2_slave_d); + prefetch_write(phase_3_slave_d); + prefetch_write(phase_4_slave_d); + + const auto& D=phase_constant.D; + + const auto& a=phase_start.a(); prefetch_read(a); + const auto& b=phase_start.b(); prefetch_read(b); + + const auto& gcd_1_0=phase_0_master_d.gcd_1_0; + + auto& b_b =phase_0_slave_d.b_b; + auto& a_4 =phase_0_slave_d.a_4; + auto& b_b_D =phase_0_slave_d.b_b_D; + auto& c =phase_0_slave_d.c; + auto& c_remainder =phase_0_slave_d.c_remainder; + auto& U0s =phase_0_slave_d.U0s; + auto& U1s =phase_0_slave_d.U1s; + auto& k_index =phase_0_slave_d.k_index; + + { + thread_local int validate_iter=0; + ++validate_iter; + bool validate_c=(validate_iter&(validate_interval-1))==0 && validate_interval!=-1; + + { + TRACK_CYCLES //606 + b_b.set_mul(b, b); + } + { + TRACK_CYCLES //193 + a_4.set_left_shift(a, 2); + } + { + TRACK_CYCLES //385 + b_b_D.set_sub(b_b, D); + } + + if (!validate_c) { + TRACK_CYCLES //747 + c.set_divide_exact(b_b_D, a_4); + } else { + TRACK_CYCLES //1309; latency is hidden by gcd being slow + c.set_divide_floor(b_b_D, a_4, c_remainder); + if (c_remainder.sgn()!=0) { + assert(!is_vdf_test); //should never have corruption unless there are bugs + phase_start.corruption_flag=true; //bad + return false; + } + } + + { + TRACK_CYCLES //100 + if (a.sgn()<0 || c.sgn()<0) { + assert(!is_vdf_test); + phase_start.corruption_flag=true; + return false; + } + } + } + + // + // + + int k_index_local=0; + + //calculating gcd(b,a).u, so bu+av=g + //if b is negative, then u is negated: (-b)(-u)+av=g + //if a and b are swapped, will calculate v but the negation is unchanged + + { + //if |b|<|a|: swap(U0, U1) + auto& c_U0=(phase_start.b_higher_magnitude_than_a? U0s[0] : U1s[0]); + auto& c_U1=(phase_start.b_higher_magnitude_than_a? U1s[0] : U0s[0]); + + if (calculate_k_repeated_mod) { + TRACK_CYCLES //176 + assert(calculate_k_repeated_mod_interval>=1); + + //U0=-c ; U1=0 + c_U0=c; + c_U0.negate(); + c_U1=uint64(0ull); + } else { + TRACK_CYCLES + //U0=1 ; U1=0 + c_U0=uint64(1ull); + c_U1=uint64(0ull); + } + } + + //if |b|<|a|: *=-1 + /*if (!phase_start.b_higher_magnitude_than_a) { + TRACK_CYCLES + U0s[0].negate(); + U1s[0].negate(); + }*/ + + { + TRACK_CYCLES //206 + //if b<0: *=-1 + if (b.sgn()<0) { + U0s[0].negate(); + U1s[0].negate(); + } + } + + bool mod_pending=true; //have to calculate -c%a even if no work is done + int num_multiplications=0; + + int gcd_index=0; + while (true) { + const gcd_uv_entry* c_entry=nullptr; + { + TRACK_CYCLES //357 + if (!gcd_1_0.get_entry(counter_start_phase_0, gcd_index, &c_entry)) { + TRACK_CYCLES_ABORT + return false; + } + } + + if (gcd_index!=0) { + auto& in_U0=U0s[k_index_local]; + auto& in_U1=U1s[k_index_local]; + + auto& out_U0=U0s[1-k_index_local]; + auto& out_U1=U1s[1-k_index_local]; + + { + TRACK_CYCLES //325 + c_entry->matrix_multiply(in_U0, in_U1, out_U0, out_U1); + ++num_multiplications; + mod_pending=true; + } + + if (calculate_k_repeated_mod && num_multiplications==calculate_k_repeated_mod_interval) { + TRACK_CYCLES //650 with calculate_k_repeated_mod_interval==1 + out_U0%=a; + out_U1%=a; + mod_pending=false; + num_multiplications=0; + } + + k_index_local=1-k_index_local; + } + + ++gcd_index; + + if (c_entry->exit_flag) { + break; + } + } + + if (calculate_k_repeated_mod && mod_pending) { + TRACK_CYCLES //2612 with calculate_k_repeated_mod_interval infinite; + U0s[k_index_local]%=a; + U1s[k_index_local]%=a; + mod_pending=false; + } + + if (!calculate_k_repeated_mod) { + TRACK_CYCLES //1825 (critical path 2) + // k=(-U0*c)%a + auto& in_U0=U0s[k_index_local]; + + in_U0.set_mul(in_U0, c); + in_U0.negate(); + in_U0.set_mod(in_U0, a); + } + + inject_error(U0s[k_index_local]); + + k_index=k_index_local; + + return true; + } + + // + // ========================================================================================================================== + // + + bool phase_1_master() { + { + TRACK_CYCLES //3335 (this stall doesn't matter since this thread is slower than the slave thread in this phase) + if (!c_thread_state.fence(counter_start_phase_1)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& c=phase_0_slave_d.c; prefetch_read(c); + + auto& v0s=phase_1_master_d.v0s; + auto& v1s=phase_1_master_d.v1s; + auto& v_index=phase_1_master_d.v_index; + + const auto& gcd_s_t=phase_1_slave_d.gcd_s_t; + + int v_index_local=0; + + v0s[0]=uint64(0ull); + v1s[0]=uint64(1ull); + + int gcd_index=0; + while (true) { + const gcd_uv_entry* c_entry=nullptr; + { + TRACK_CYCLES //396 + if (!gcd_s_t.get_entry(counter_start_phase_1, gcd_index, &c_entry)) { + TRACK_CYCLES_ABORT + return false; + } + } + + if (gcd_index!=0) { + TRACK_CYCLES //206 + + int1x& in_v0=v0s[v_index_local]; + int1x& in_v1=v1s[v_index_local]; + + int1x& out_v0=v0s[1-v_index_local]; + int1x& out_v1=v1s[1-v_index_local]; + + c_entry->matrix_multiply(in_v0, in_v1, out_v0, out_v1); + + v_index_local=1-v_index_local; + } + + ++gcd_index; + + if (c_entry->exit_flag) { + break; + } + } + + inject_error(v0s[v_index_local]); + inject_error(v1s[v_index_local]); + + v_index=v_index_local; + + return true; + } + + bool phase_1_slave() { + { + TRACK_CYCLES //78 + if (!c_thread_state.fence(counter_start_phase_0)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& gcd_L=phase_constant.gcd_L; + + const auto& a=phase_start.a(); + + const auto& k=phase_0_slave_d.k(); + + auto& gcd_s_t=phase_1_slave_d.gcd_s_t; + + { + TRACK_CYCLES //323 + gcd_s_t.get_a_start()=a; + gcd_s_t.get_b_start()=k; + } + + { + TRACK_CYCLES //8551 (critical path 3) + if (!gcd_unsigned(counter_start_phase_1, gcd_s_t, gcd_L)) { + TRACK_CYCLES_ABORT + return false; + } + } + + return true; + } + + // + // ========================================================================================================================== + // + + bool phase_2_master() { + { + TRACK_CYCLES //76 + if (!c_thread_state.fence(counter_start_phase_1)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& a=phase_start.a(); + const auto& b=phase_start.b(); + const auto& c=phase_0_slave_d.c; + + const auto& v1=phase_1_master_d.v1(); + + bool S_negative=phase_1_master_d.S_negative(); + + auto& c_v1 =phase_2_master_d.c_v1; + auto& b_t =phase_2_master_d.b_t; + auto& b_t_c_v1 =phase_2_master_d.b_t_c_v1; + auto& h =phase_2_master_d.h; + auto& v1_h =phase_2_master_d.v1_h; + + { + TRACK_CYCLES //453 + c_v1.set_mul(c, v1); + } + + { + TRACK_CYCLES //97 + if (!c_thread_state.fence(counter_start_phase_2)) { + TRACK_CYCLES_ABORT + return false; + } + } + const auto& t=phase_1_slave_d.t(); prefetch_read(t); + + { + TRACK_CYCLES //426 + b_t.set_mul(b, t); + } + + { + TRACK_CYCLES //212 + b_t_c_v1.set_add(b_t, c_v1); + } + + { + TRACK_CYCLES //439 + h.set_divide_exact(b_t_c_v1, a); + } + + { + TRACK_CYCLES //98 + if (S_negative) { + h.negate(); + } + } + + { + TRACK_CYCLES //324 + v1_h.set_mul(v1, h); + } + + return true; + } + + bool phase_2_slave() { + { + TRACK_CYCLES //97 + if (!c_thread_state.fence(counter_start_phase_1)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& t=phase_1_slave_d.t(); + + auto& t_t_S =phase_2_slave_d.t_t_S; + auto& v0_2 =phase_2_slave_d.v0_2; + + { + TRACK_CYCLES //198 + t_t_S.set_mul(t, t); + } + + { + TRACK_CYCLES //812 + if (!c_thread_state.fence(counter_start_phase_2)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& v0=phase_1_master_d.v0(); prefetch_read(v0); + const auto& v1=phase_1_master_d.v1(); prefetch_read(v1); + + bool S_negative; + { + TRACK_CYCLES //189 + S_negative=phase_1_master_d.S_negative(); + } + + { + TRACK_CYCLES //91 + if (S_negative) { + t_t_S.negate(); + } + } + + { + TRACK_CYCLES //102 + v0_2.set_left_shift(v0, 1); + } + + return true; + } + + // + // ========================================================================================================================== + // + + bool phase_3_master() { + { + TRACK_CYCLES //116 + if (!c_thread_state.fence(counter_start_phase_3)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& v1_h =phase_2_master_d.v1_h; + + const auto& t_t_S =phase_2_slave_d.t_t_S; prefetch_read(t_t_S); + const auto& v0_2 =phase_2_slave_d.v0_2; prefetch_read(v0_2); + + auto& A=phase_start.A(); + auto& A_2=phase_3_master_d.A_2; + + { + TRACK_CYCLES //223 + A.set_add(t_t_S, v1_h); + } + + { + TRACK_CYCLES //180 + A_2.set_left_shift(A, 1); + } + + return true; + } + bool phase_3_slave() { + { + TRACK_CYCLES //78 + if (!c_thread_state.fence(counter_start_phase_2)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& a=phase_start.a(); + const auto& b=phase_start.b(); + + const auto& t=phase_1_slave_d.t(); + const auto& v0=phase_1_master_d.v0(); + const auto& v1=phase_1_master_d.v1(); + bool S_negative=phase_1_master_d.S_negative(); + + auto& S_t_v0 =phase_3_slave_d.S_t_v0; + auto& a_S_t_v0 =phase_3_slave_d.a_S_t_v0; + auto& t_2_a_S_t_v0 =phase_3_slave_d.t_2_a_S_t_v0; + auto& t_2 =phase_3_slave_d.t_2; + auto& t_2_a_S_t_v0_v1 =phase_3_slave_d.t_2_a_S_t_v0_v1; + auto& e =phase_3_slave_d.e; + + { + TRACK_CYCLES //244 + S_t_v0.set_mul(t, v0); + } + + { + TRACK_CYCLES //60 + if (S_negative) { + S_t_v0.negate(); + } + } + + { + TRACK_CYCLES //299 + a_S_t_v0.set_add(a, S_t_v0); + } + + { + TRACK_CYCLES //101 + t_2.set_left_shift(t, 1); + } + + { + TRACK_CYCLES //384 + t_2_a_S_t_v0.set_mul(t_2, a_S_t_v0); + } + + { + TRACK_CYCLES //666 + t_2_a_S_t_v0_v1.set_divide_exact(t_2_a_S_t_v0, v1); + } + + { + TRACK_CYCLES //353 + e.set_add(b, t_2_a_S_t_v0_v1); + e.negate(); + } + + return true; + } + + // + // ========================================================================================================================== + // + + bool phase_4_master() { + { + TRACK_CYCLES //79 + if (!c_thread_state.fence(counter_start_phase_3)) { + TRACK_CYCLES_ABORT + return false; + } + } + + const auto& v0_2=phase_2_slave_d.v0_2; + const auto& h=phase_2_master_d.h; + const auto& A_2=phase_3_master_d.A_2; + + auto& v0_2_h=phase_4_master_d.v0_2_h; + auto& f =phase_4_master_d.f; + auto& A =phase_start.A(); + auto& B =phase_start.B(); + auto& b_higher_magnitude_than_a=phase_start.b_higher_magnitude_than_a; + auto& ab_index=phase_start.ab_index; + auto& num_valid_iterations=phase_start.num_valid_iterations; + + { + TRACK_CYCLES //177 + + auto& gcd_1_0=phase_0_master_d.gcd_1_0; + + if (gcd_1_0.get_a_end()!=uint64(1ull)) { + assert(!is_vdf_test); + phase_start.corruption_flag=true; + return false; + } + + if (gcd_1_0.get_b_end().sgn()!=0) { + assert(!is_vdf_test); + phase_start.corruption_flag=true; + return false; + } + } + + { + TRACK_CYCLES //318 + v0_2_h.set_mul(v0_2, h); + } + + { + TRACK_CYCLES //211 + if (!c_thread_state.fence(counter_start_phase_4)) { + TRACK_CYCLES_ABORT + return false; + } + } + const auto& e=phase_3_slave_d.e; prefetch_read(e); + + { + TRACK_CYCLES //192 + f.set_sub(e, v0_2_h); + } + + { + TRACK_CYCLES //430 + B.set_mod(f, A_2); + } + + { + TRACK_CYCLES //80 + A.abs(); + } + + { + TRACK_CYCLES //94 + b_higher_magnitude_than_a=(B.compare_abs(A)>=0); + } + + ab_index=1-ab_index; + ++num_valid_iterations; + + //phase_start.wjba=phase_start.a(); + //phase_start.wjbb=phase_start.b(); + + return true; + } + bool phase_4_slave() { + + return true; + } + + // + // ========================================================================================================================== + // + + static const int num_phases=5; + static const int counter_end=counter_start_phase_5; //added to counter_start to get the next counter + + void init(const integer& t_D, const integer& t_L, const integer& t_a, const integer& t_b) { + int2x zero; + zero=uint64(0ull); + + phase_constant.D=t_D.impl; + phase_constant.L=t_L.impl; + phase_constant.gcd_zero=zero.to_array(); + phase_constant.gcd_L=phase_constant.L.to_array(); + + phase_start.ab_index=0; + phase_start.num_valid_iterations=0; + phase_start.corruption_flag=false; + + auto& a=phase_start.a(); + auto& b=phase_start.b(); + + a=t_a.impl; + b=t_b.impl; + + phase_start.b_higher_magnitude_than_a=(b.compare_abs(a)>=0); + } + + int get_counter_start(int phase) { + int res[]={counter_start_phase_0, counter_start_phase_1, counter_start_phase_2, counter_start_phase_3, counter_start_phase_4}; + return res[phase]; + } + + bool call_phase(int phase, bool is_slave) { + decltype(&square_state_type::phase_0_master) funcs_master[]={ + &square_state_type::phase_0_master, + &square_state_type::phase_1_master, + &square_state_type::phase_2_master, + &square_state_type::phase_3_master, + &square_state_type::phase_4_master + }; + + decltype(&square_state_type::phase_0_slave) funcs_slave[]={ + &square_state_type::phase_0_slave, + &square_state_type::phase_1_slave, + &square_state_type::phase_2_slave, + &square_state_type::phase_3_slave, + &square_state_type::phase_4_slave + }; + + return (this->*((is_slave)? funcs_slave : funcs_master)[phase])(); + } + + bool single_thread_master_first(int phase) { + //for gcds, the thread calling gcd_unsigned has to go first + return phase!=1; + } + + //if this returns false then there is corruption and the inputs are unchanged + //if it returns true, the inputs have been advanced by num_iterations + //num_iterations can be less than the requested number if there was an error (e.g. large gcd quotient, thread spun for too long, etc) + //this will set num_iterations to ~uint64(0) if the return value is false + bool assign(integer& t_a, integer& t_b, integer& t_c, uint64& num_iterations) { + num_iterations=phase_start.num_valid_iterations; + + if (phase_start.corruption_flag) { + assert(!is_vdf_test); + num_iterations=~uint64(0); + return false; + } + + const auto& a=phase_start.a(); + const auto& b=phase_start.b(); + + const auto& D=phase_constant.D; + + auto& b_b =phase_0_slave_d.b_b; + auto& a_4 =phase_0_slave_d.a_4; + auto& b_b_D =phase_0_slave_d.b_b_D; + auto& c =phase_0_slave_d.c; + auto& c_remainder =phase_0_slave_d.c_remainder; + + b_b.set_mul(b, b); + a_4.set_left_shift(a, 2); + b_b_D.set_sub(b_b, D); + + c.set_divide_floor(b_b_D, a_4, c_remainder); + if (c_remainder.sgn()!=0 || a.sgn()<0 || c.sgn()<0) { + assert(!is_vdf_test); + num_iterations=~uint64(0); + return false; + } + + mpz_set(t_a.impl, a); + mpz_set(t_b.impl, b); + mpz_set(t_c.impl, c); + + return true; + } + /* + bool assignwjb(integer& t_a, integer& t_b, integer& t_c, uint64& num_iterations) { + + int4x b_b; // b_b_D=b^2 + int2x a_4; // a_4=4a=a<<2 + int4x b_b_D; // b_b_D=b^2-D + int4x c; // c=(b^2-D)/(4a) + int4x c_remainder; //only assigned if c is being validated + + num_iterations=phase_start.num_valid_iterations; + + if (phase_start.corruption_flag) { + assert(!is_vdf_test); + num_iterations=~uint64(0); + return false; + } + + const auto& a=phase_start.wjba; + const auto& b=phase_start.wjbb; + + const auto& D=phase_constant.D; + + b_b.set_mul(b, b); + a_4.set_left_shift(a, 2); + b_b_D.set_sub(b_b, D); + + c.set_divide_floor(b_b_D, a_4, c_remainder); + if (c_remainder.sgn()!=0 || a.sgn()<0 || c.sgn()<0) { + assert(!is_vdf_test); + num_iterations=~uint64(0); + return false; + } + + mpz_set(t_a.impl, a); + mpz_set(t_b.impl, b); + mpz_set(t_c.impl, c); + + return true; + }*/ +}; + +#define NL_SQUARESTATE 1 +#define NL_FORM 2 + +class INUDUPLListener{ +public: + virtual void OnIteration(int type, void *data, uint64 iteration)=0; +}; + +//this should never have an infinite loop +//the gcd loops all have maximum counters after which they'll error out, and the thread_state loops also have a maximum spin counter +void repeated_square_fast_work(square_state_type &square_state,bool is_slave, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { + c_thread_state.reset(); + c_thread_state.is_slave=is_slave; + c_thread_state.pairindex=square_state.pairindex; + + bool has_error=false; + + for (uint64 iter=0;iterOnIteration(NL_SQUARESTATE,&square_state,base+iter); + } + } + + #ifdef ENABLE_TRACK_CYCLES + { + if (is_slave) { + sleep(1); + } + + print( "track cycles is_slave:", is_slave ); + TRACK_CYCLES_OUTPUT_STATS + print( "" ); + print( "" ); + print( "" ); + } + #endif +} + +uint64 repeated_square_fast_multithread(square_state_type &square_state, form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { + master_counter[square_state.pairindex].reset(); + slave_counter[square_state.pairindex].reset(); + + square_state.init(D, L, f.a, f.b); + memory_barrier(); + + thread slave_thread(repeated_square_fast_work, std::ref(square_state), false, base, iterations, std::ref(nuduplListener)); + + repeated_square_fast_work(square_state, true, base, iterations, nuduplListener); + + slave_thread.join(); //slave thread can't get stuck; is supposed to error out instead + memory_barrier(); + + uint64 res; + square_state.assign(f.a, f.b, f.c, res); + + return res; +} + +uint64 repeated_square_fast_single_thread(square_state_type &square_state, form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { + master_counter[square_state.pairindex].reset(); + slave_counter[square_state.pairindex].reset(); + + square_state.init(D, L, f.a, f.b); + + thread_state thread_state_master; + thread_state thread_state_slave; + + thread_state_master.reset(); + thread_state_master.is_slave=false; + thread_state_master.pairindex=square_state.pairindex; + + thread_state_slave.reset(); + thread_state_slave.is_slave=true; + thread_state_slave.pairindex=square_state.pairindex; + + bool has_error=false; + + for (uint64 iter=0;iterOnIteration(NL_SQUARESTATE,&square_state,base+iter); + } + + uint64 res; + square_state.assign(f.a, f.b, f.c, res); //sets res to ~uint64(0) and leaves f unchanged if there is corruption + + #ifdef ENABLE_TRACK_CYCLES + print( "stats both threads:" ); + TRACK_CYCLES_OUTPUT_STATS + #endif + + return res; +} + +//returns number of iterations performed +//if this returns ~0, the discriminant was invalid and the inputs are unchanged +uint64 repeated_square_fast(square_state_type &square_state,form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { + + if (enable_threads) { + return repeated_square_fast_multithread(square_state, f, D, L, base, iterations, nuduplListener); + } else { + return repeated_square_fast_single_thread(square_state, f, D, L, base, iterations, nuduplListener); + } +} diff --git a/lib/chiavdf/fast_vdf/vdf_new.cpp b/lib/chiavdf/fast_vdf/vdf_new.cpp new file mode 100644 index 00000000..1a8632d4 --- /dev/null +++ b/lib/chiavdf/fast_vdf/vdf_new.cpp @@ -0,0 +1,30 @@ +/*#include "include.h" + +#include "integer.h" + +#include "vdf_new.h" + +int main(int argc, char** argv) { + parse_args(argc, argv); + + integer a; + integer b; + integer c; + generator_for_discriminant(arg_discriminant, a, b, c); + + for (int x=0;x(argv[1]); + } else + if (argc==3) { + arg_discriminant=integer(argv[1]); + arg_iterations=from_string(argv[2]); + } else { + assert(false); + } +}**/ \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/vdf_original.h b/lib/chiavdf/fast_vdf/vdf_original.h new file mode 100644 index 00000000..3d6c9ddc --- /dev/null +++ b/lib/chiavdf/fast_vdf/vdf_original.h @@ -0,0 +1,321 @@ +/** +Copyright 2018 Chia Network Inc + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +***/ + +namespace vdf_original { + struct form { + // y = ax^2 + bxy + y^2 + mpz_t a; + mpz_t b; + mpz_t c; + + //mpz_t d; // discriminant + }; + + ostream& operator<<(ostream& os, const form& f) { + return os << "a: " << f.a << endl << "b: " << f.b << endl << "c: " << f.c << endl; + } + + mpz_t negative_a, r, denom, old_b, ra, s, x, old_a, g, d, e, q, w, u, a, + b, m, k, mu, v, sigma, lambda, h, t, l, j; + form f3; + + inline void normalize(form& f) { + mpz_neg(negative_a, f.a); + if (mpz_cmp(f.b, negative_a) > 0 && mpz_cmp(f.b, f.a) <= 0) { + // Already normalized + return; + } + // r = (a - b) / 2a + // a = a + // b = b + 2ra + // c = ar^2 + br + c + mpz_sub(r, f.a, f.b); + + mpz_mul_ui(denom, f.a, 2); + + // r = (a-b) / 2a + mpz_fdiv_q(r, r, denom); + + mpz_set(old_b, f.b); + + mpz_mul(ra, r, f.a); + mpz_add(f.b, f.b, ra); + mpz_add(f.b, f.b, ra); + + // c += ar^2 + mpz_mul(ra, ra, r); + mpz_add(f.c, f.c, ra); + + // c += rb + mpz_set(ra, r); + mpz_mul(ra, ra, old_b); + mpz_add(f.c, f.c, ra); + } + + inline void reduce(form& f) { + normalize(f); + while ((mpz_cmp(f.a, f.c) > 0) || + (mpz_cmp(f.a, f.c) == 0 && mpz_cmp_si(f.b, 0) < 0)) { + mpz_add(s, f.c, f.b); + + // x = 2c + mpz_mul_ui(x, f.c, 2); + mpz_fdiv_q(s, s, x); + + mpz_set(old_a, f.a); + mpz_set(old_b, f.b); + + // b = -b + mpz_set(f.a, f.c); + mpz_neg(f.b, f.b); + + // x = 2sc + mpz_mul(x, s, f.c); + mpz_mul_ui(x, x, 2); + + // b += 2sc + mpz_add(f.b, f.b, x); + + // c = cs^2 + mpz_mul(f.c, f.c, s); + mpz_mul(f.c, f.c, s); + + // x = bs + mpz_mul(x, old_b, s); + + // c -= bs + mpz_sub(f.c, f.c, x); + + // c += a + mpz_add(f.c, f.c, old_a); + } + normalize(f); + } + + inline form generator_for_discriminant(mpz_t* d) { + form x; + mpz_init_set_ui(x.a, 2); + mpz_init_set_ui(x.b, 1); + mpz_init(x.c); + //mpz_init_set(x.d, *d); + + // c = b*b - d + mpz_mul(x.c, x.b, x.b); + mpz_sub(x.c, x.c, *d); + + // denom = 4a + mpz_mul_ui(denom, x.a, 4); + + mpz_fdiv_q(x.c, x.c, denom); + reduce(x); + return x; + } + + // Returns mu and v, solving for x: ax = b mod m + // such that x = u + vn (n are all integers). Assumes that mu and v are initialized. + // Returns 0 on success, -1 on failure + inline int solve_linear_congruence(mpz_t& mu, mpz_t& v, mpz_t& a, mpz_t& b, mpz_t& m) { + // g = gcd(a, m), and da + em = g + mpz_gcdext(g, d, e, a, m); + + // q = b/g, r = b % g + mpz_fdiv_qr(q, r, b, g); + + if (mpz_cmp_ui(r, 0) != 0) { + // No solution, return error. Optimize out for speed.. + cout << "No solution to congruence" << endl; + return -1; + } + + mpz_mul(mu, q, d); + mpz_mod(mu, mu, m); + + mpz_fdiv_q(v, m, g); + return 0; + } + + // Faster version without check, and without returning v + inline int solve_linear_congruence(mpz_t& mu, mpz_t& a, mpz_t& b, mpz_t& m) { + mpz_gcdext(g, d, e, a, m); + mpz_fdiv_q(q, b, g); + mpz_mul(mu, q, d); + mpz_mod(mu, mu, m); + return 0; + } + + // Takes the gcd of three numbers + inline void three_gcd(mpz_t& ret, mpz_t& a, mpz_t& b, mpz_t& c) { + mpz_gcd(ret, a, b); + mpz_gcd(ret, ret, c); + } + + inline form* multiply(form &f1, form &f2) { + //assert(mpz_cmp(f1.d, f2.d) == 0); + + // g = (b1 + b2) / 2 + mpz_add(g, f1.b, f2.b); + mpz_fdiv_q_ui(g, g, 2); + + + // h = (b2 - b1) / 2 + mpz_sub(h, f2.b, f1.b); + mpz_fdiv_q_ui(h, h, 2); + + // w = gcd(a1, a2, g) + three_gcd(w, f1.a, f2.a, g); + + // j = w + mpz_set(j, w); + + // r = 0 + mpz_set_ui(r, 0); + + // s = a1/w + mpz_fdiv_q(s, f1.a, w); + + // t = a2/w + mpz_fdiv_q(t, f2.a, w); + + // u = g/w + mpz_fdiv_q(u, g, w); + + // solve (tu)k = (hu + sc1) mod st, of the form k = mu + vn + + // a = tu + mpz_mul(a, t, u); + + // b = hu + sc1 + mpz_mul(b, h, u); + mpz_mul(m, s, f1.c); + mpz_add(b, b, m); + + // m = st + mpz_mul(m, s, t); + + int ret = solve_linear_congruence(mu, v, a, b, m); + + assert(ret == 0); + + // solve (tv)n = (h - t * mu) mod s, of the form n = lamda + sigma n' + + // a = tv + mpz_mul(a, t, v); + + // b = h - t * mu + mpz_mul(m, t, mu); // use m as a temp variable + mpz_sub(b, h, m); + + // m = s + mpz_set(m, s); + + ret = solve_linear_congruence(lambda, sigma, a, b, m); + assert(ret == 0); + + // k = mu + v*lamda + mpz_mul(a, v, lambda); // use a as a temp variable + + mpz_add(k, mu, a); + + // l = (k*t - h) / s + mpz_mul(l, k, t); + mpz_sub(l, l, h); + mpz_fdiv_q(l, l, s); + + // m = (tuk - hu - cs) / st + mpz_mul(m, t, u); + mpz_mul(m, m, k); + mpz_mul(a, h, u); // use a as a temp variable + mpz_sub(m, m, a); + mpz_mul(a, f1.c, s); // use a as a temp variable + mpz_sub(m, m, a); + mpz_mul(a, s, t); // use a as a temp variable + mpz_fdiv_q(m, m, a); + + // A = st - ru + mpz_mul(f3.a, s, t); + mpz_mul(a, r, u); // use a as a temp variable + mpz_sub(f3.a, f3.a, a); + + // B = ju + mr - (kt + ls) + mpz_mul(f3.b, j, u); + mpz_mul(a, m, r); // use a as a temp variable + mpz_add(f3.b, f3.b, a); + mpz_mul(a, k, t); // use a as a temp variable + mpz_sub(f3.b, f3.b, a); + mpz_mul(a, l, s); // use a as a temp variable + mpz_sub(f3.b, f3.b, a); + + // C = kl - jm + mpz_mul(f3.c, k, l); + mpz_mul(a, j, m); + mpz_sub(f3.c, f3.c, a); + + //mpz_set(f3.d, f1.d); + + reduce(f3); + return &f3; + } + + /** + * This algorithm is the same as the composition/multiply algorithm, + * but simplified to where both inputs are equal (squaring). It also + * assumes that the discriminant is a negative prime. Algorithm: + * + * 1. solve for mu: b(mu) = c mod a + * 2. A = a^2 + * B = B - 2a * mu + * C = mu^2 - (b * mu - c)/a + * 3. reduce f(A, B, C) + **/ + inline form* square(form &f1) { + int ret = solve_linear_congruence(mu, f1.b, f1.c, f1.a); + assert(ret == 0); + + mpz_mul(m, f1.b, mu); + mpz_sub(m, m, f1.c); + mpz_fdiv_q(m, m, f1.a); + + // New a + mpz_set(old_a, f1.a); + mpz_mul(f3.a, f1.a, f1.a); + + // New b + mpz_mul(a, mu, old_a); + mpz_mul_ui(a, a, 2); + mpz_sub(f3.b, f1.b, a); + + // New c + mpz_mul(f3.c, mu, mu); + mpz_sub(f3.c, f3.c, m); + //mpz_set(f3.d, f1.d); + reduce(f3); + return &f3; + } + + // Performs the VDF squaring iterations + inline form repeated_square(form *f, uint64_t iterations) { + for (uint64_t i=0; i < iterations; i++) { + f = square(*f); + } + return *f; + } + + void init() { + mpz_inits(negative_a, r, denom, old_a, old_b, ra, s, x, g, d, e, q, w, m, + u, a, b, k, mu, v, sigma, lambda, f3.a, f3.b, f3.c, //f3.d, + NULL); + } +} diff --git a/lib/chiavdf/fast_vdf/vdf_test.cpp b/lib/chiavdf/fast_vdf/vdf_test.cpp new file mode 100644 index 00000000..74a56afb --- /dev/null +++ b/lib/chiavdf/fast_vdf/vdf_test.cpp @@ -0,0 +1,438 @@ +#include "include.h" + +#include "parameters.h" + +#include "bit_manipulation.h" +#include "double_utility.h" +#include "integer.h" + +#include "asm_main.h" + +#include "vdf_original.h" + +#include "vdf_new.h" + +#include "gpu_integer.h" +#include "gpu_integer_divide.h" + +#include "gcd_base.h" +#include "gpu_integer_gcd.h" + +#include "vdf_test.h" + +#if VDF_MODE==0 + const bool test_correctness=false; + const bool assert_on_rollback=false; + const bool debug_rollback=false; + const int repeated_square_checkpoint_interval=1<<10; //should be a power of 2 +#endif + +#if VDF_MODE==1 + const bool test_correctness=true; + const bool assert_on_rollback=true; + const bool debug_rollback=false; + const int repeated_square_checkpoint_interval=1<<10; +#endif + +using namespace std; +//using simd_integer_namespace::track_cycles_test; + +//each thread updates a sequence number. the write is atomic on x86 +//it also has an array of outputs that is append-only +//it will generate an output, do a mfence, then increment the sequence number non-atomically (since it is the only writer) +//it can also wait for another thread's outputs by spinning on its sequence number (with a timeout) +//error states: +//-any thread can change its sequence number to "error" which is the highest uint64 value +//-it will do this if any operation fails or if it spins too long waiting for another thread's output +//-also, the spin loop will error out if the other thread's sequence number is "error". this will make the spinning thread's sequence +// number also be "error" +//-once a thread has become "error", it will exit the code. the slave threads will wait on the barrier and the main thread will just +// exit the squaring function with a "false" output +//-the error state is the global sequence number with the msb set. this allows the sequence number to not be reset across calls +//will just make every state have a 48 bit global sequence number (enough for 22 years) plus a 16 bit local sequence number +//-the last local sequence number is the error state +//-there is no finish state since each state update will change the sequence number to a new, unique sequence number +//to start the squaring, the main thread will output A and B then increase its sequence number to the next global sequence number +//-slave threads will wait on this when they are done squaring or have outputted the error state +//-is is assumed that the main thread synchronizes with each slave thread to consume its output +//can probably write the synchronization code in c++ then because of how simple it is +//this is trivial to implement and should be reliable +//if the gcd generates too many matricies (more than 32 or so), it should generate an error +//need to write each output to a separate cache line +//will use the slave core for: cofactors for both gcds, calculate C at the start of the squaring, calculate (-v2*c)%a as the v2 +// cofactor is being generated. this will use <0,-c> as the initial state instead of <0,1> and will also reduce everything modulo a +// after each matrix multiplication. it will also calculate C first. +//the slave core will then calculate the partial gcd and the master core will calculate the cofactors +//once the master core has calculated all of the cofactors, it will also know the final values of a_copy and k_copy from the +// slave core. the slave core is done now +//the master core will calculate the new values of A and B on its own. this can't be parallelized + +//-have an asm gcd. nothing else is asm. will use gmp for everything else +//-the asm gcd takes unsigned inputs where a>=b. it returns unsigned outputs. its inputs are zero-padded to a fixed size +//-it modifes its inputs and returns a sequence of cofactor matricies +//-gmp has some utility functions to make this work easily. gmp can also calculate the new size. the resulting sign is always + +//--the sequence is outputted to a fixed size array of cache lines. there is also an output counter which should initially be 0 +//-- and can be any pointer. the msb of the output counter is used to indicate the last output +//-the slave core is still used +//-gmp is close to optimal for the pentium machine so will just use it. for the fast machine, can use avx-512 if i have time. the gmp +//- division is still used but only to find the approximate inverse. the result quotient should be >= the actual quotient for exact +//- division to still work + +//generic_stats track_cycles_total; + +void square_original(form& f) { + vdf_original::form f_in; + f_in.a[0]=f.a.impl[0]; + f_in.b[0]=f.b.impl[0]; + f_in.c[0]=f.c.impl[0]; + + vdf_original::form& f_res=*vdf_original::square(f_in); + + mpz_set(f.a.impl, f_res.a); + mpz_set(f.b.impl, f_res.b); + mpz_set(f.c.impl, f_res.c); +} + +bool square_fast(form& f, const integer& d, const integer& L, int current_iteration) { + form f_copy; + if (test_correctness) { + f_copy=f; + } + + bool success=false; + + const int max_bits_ab=max_bits_base + num_extra_bits_ab; + const int max_bits_c=max_bits_base + num_extra_bits_c; + + //sometimes the nudupl code won't reduce the output all the way. if it has too many bits it will get reduced by calling + // square_original + if (f.a.num_bits() ", checkpoint_iteration ); + } + + current_iteration=checkpoint_iteration; + current=checkpoint; + error_mode=true; + did_rollback=true; + assert(!assert_on_rollback); + } + } + + void advance_error() { + square_original(current); + ++current_iteration; + } + + void advance() { + bool did_rollback=false; + if (error_mode) { + advance_error(); + } else { + advance_fast(did_rollback); + } + + if (!did_rollback && is_checkpoint()) { + checkpoint_iteration=current_iteration; + checkpoint=current; + error_mode=false; + } + } + + repeated_square(integer t_d, form initial, int64 t_num_iterations) { + d=t_d; + L=root(-d, 4); + //L=integer(1)<<512; + + checkpoint=initial; + current=initial; + num_iterations=t_num_iterations; + + while (current_iteration, 2> ab={fixed_integer(ab_start_0), fixed_integer(ab_start_1)}; + //array, 2> uv; + //int parity; + //gcd_unsigned( + //ab, + //uv, + //parity, + //fixed_integer(threshold) + //); + + //todo assert(false); + + //todo //set up thread affinity. make sure they are not hyperthreads on the same core if possible + + set_rounding_mode(); + + vdf_original::init(); + + integer d(argv[1]); + int64 num_iterations=from_string(argv[2]); + form d_initial=form::generator(d); + + //integer d( + //"-0xaf0806241ecbc630fbbfd0c9d61c257c40a185e8cab313041cf029d6f070d58ecbc6c906df53ecf0dd4497b0753ccdbce2ebd9c80ae0032acce89096af642dd8c008403dd989ee5c1262545004fdcd7acf47908b983bc5fed17889030f0138e10787a8493e95ca86649ae8208e4a70c05772e25f9ac901a399529de12910a7a2c" + //"3376292be9dba600fd89910aeccc14432b6e45c0456f41c177bb736915cad3332a74e25b3993f3e44728dc2bd13180132c5fb88f0490aeb96b2afca655c13dd9ab8874035e26dab16b6aad2d584a2d35ae0eaf00df4e94ab39fe8a3d5837dcab204c46d7a7b97b0c702d8be98c50e1bf8b649b5b6194fc3bae6180d2dd24d9f" + //); + //int64 num_iterations=1000; + + //form d_initial=form::from_abd( + //integer( + //"0x6a8f34028dad0dec9e765a5d761b9b041733e86d849b507ba346052f7b768a18d0283597b581e4b9e705dccc3d5197c66186940d5bdbee00784f51dc0f193cedf619e149a7b0fd48b8c4eb6d4bf925a9d634e138254f22007337415cea377655a0c2832592db32ce9b61d4937dcffd13c33bdf1ac5164a974cd9d61b14c81820" + //), + //integer( + //"0x71c24869eed37be508e1751c21f49fcf16a68b42dec10cedf7376a036280f48a2c4b123d5f918ed4affa612a8dbacb4e6b5cdcaad439f3a5f0ab5a35ab6901025307c2ceaf54ab3bae5daae870817527dceb5fef9f7d6766a84bf843d9de74966fbd2bbad0200323876b90a3f4d9d135876a09f51225f126dd180412c658f4f" + //), + //d + //); + + repeated_square c_square(d, d_initial, num_iterations); + + cout << c_square.current.a.impl << "\n"; + cout << c_square.current.b.impl; + + //track_max.output(512); + + //if (enable_track_cycles) { + //print( "" ); + //print( "" ); + + //for (int x=0;x tracking_data; + + for (int x=0;xsecond!=0) { + base=base_i->second; + } + + print(c.first, c.second/base, " ", base); + } + } + #endif +} + + +/*void square_fast_impl(square_state& _) { + const int max_bits_ab=max_bits_base + num_extra_bits_ab; + + //all divisions are exact + + //sometimes the nudupl code won't reduce the output all the way. if it has too many bits it will get reduced by calling + // square_original + bool too_many_bits; + too_many_bits=(_.a.num_bits()>max_bits_ab || _.b.num_bits()>max_bits_ab); + if (too_many_bits) { + return false; + } + + //if a<=L then this will return false; usually a has twice as many limbs as L + bool a_too_small; + a_too_small=(_.a.num_limbs()<=_.L.num_limbs()+1); + if (a_too_small) { + return false; + } + + //only b can be negative + //neither a or b can be 0; d=b^2-4ac is prime. if b=0, then d=-4ac=composite. if a=0, then d=b^2; d>=0 + //no constraints on which is greater + //the gcd result is 1 because d=b^2-4ac ; assume gcd(a,b)!=1 ; a=A*s ; b=B*s ; s=gcd(a,b)!=1 ; d = (Bs)^2-4Asc + // d = B^2*s^2 - 4sac = s(B^2*s - 4ac) ; d is not prime. d is supposed to be prime so this can't happen + //the quadratic form might not be reduced all the way so it's possible for |b|>a. need to swap the inputs then + // (they are copied anyway) + // + // U0*b + V0*a = 1 + // U1*b + V1*a = 0 + // + // U0*b === 1 mod a + // U1*b === 0 mod a + U0=gcd(b, a, 0).u0; + + c=(b*b-D)/(a<<2); + + //start with <0,c> or which is padded to 18 limbs so that the multiplications by 64 bits are exact (same with sums) + //once the new values of uv are calculated, need to reduce modulo a, which is 17 limbs and has been normalized already + //-the normalization also left shifted c + //reducing modulo a only looks at the first couple of limbs so it has the same efficiency as doing it at the end + //the modulo result is always nonnegative + // + // k+q*a=-U0*c + k=(-U0*c)%a; + + // a>L so at least one input is >L initially + //when this terminates, one input is >L and one is <=L + //k is reduced modulo a, so |k|<|a| + //a is positive + //the result of mpz_mod is always nonnegative so k is nonnegative + // + // u0*a + v0*k = s ; s>L + // u1*a + v1*k = t ; t<=L + // v0*k === s mod a + // v1*k === t mod a + auto gcd2=gcd(a, k, L); + v0=gcd2.v0 + v1=gcd2.v1 + s=gcd2.a + t=gcd2.b + + // b*t + c*v1 === b*v1*k + c*v1 === v1(b*k+c) === v1(-U0*c*b+c) === c*v1*(1-U0*b) === c*v1*(1-1) === 0 mod a + // b*t + c*v1 = b*(u0*a + v1*k) + c*v1 = b*u0*a + v1(b*k + c) = b*u0*a + v1(c - b*(U0*c+q*a)) + // = b*u0*a + v1(c - b*U0*c - b*q*a) = b*u0*a + v1(c - (1-V0*a)*c - b*q*a) = b*u0*a + v1(V0*a*c - b*q*a) + // = a*(b*u0 + v1(V0*c - b*q)) + // ((b*t+c*v1)/a) = b*u0 + v1(V0*c - b*q) ; this is slower + // + // S = -1 if v1<=0, else 1 + // h = S*(b*t+c*v1)/a + // j = t*t*S + // + // A=t*t+v1*((b*t+c*v1)/a) + // A = j + v1*h + A=t*t+v1*((b*t+c*v1)/a); + + if (v1<=0) { + A=-A; + } + + // e = 2t*(a + S*t*v0)/v1 + // e' = b - e + // f = e' - 2*v0*h + // + // (2*a*t + 2*A*v0)/v1 + // = (2*a*t + 2*j*v0 + 2*v1*v0*h)/v1 + // = (2*a*t + 2*j*v0)/v1 + 2*v0*h + // = (2*a*t + 2*S*t*t*v0)/v1 + 2*v0*h + // = 2t*(a + S*t*v0)/v1 + 2*v0*h + // = e + 2*v0*h + // + // B = ( b - ((a*t+A*v0)*2)/v1 )%(A*2) + // = ( b - e - 2*v0*h )%(A*2) + // = ( e' - 2*v0*h )%(A*2) + // = f % (2A) + B=( b - ((a*t+A*v0)*2)/v1 )%(A*2); + + A=abs(A) + + return true; +} */ \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/vdf_test.h b/lib/chiavdf/fast_vdf/vdf_test.h new file mode 100644 index 00000000..1520f254 --- /dev/null +++ b/lib/chiavdf/fast_vdf/vdf_test.h @@ -0,0 +1,316 @@ +bool square_fast_impl(form& f, const integer& D, const integer& L, int current_iteration) { + const int max_bits_ab=max_bits_base + num_extra_bits_ab; + const int max_bits_c=max_bits_base + num_extra_bits_ab*2; + + //sometimes the nudupl code won't reduce the output all the way. if it has too many bits it will get reduced by calling + // square_original + if (!(f.a.num_bits() a_int(a); + fixed_integer b_int(b); + fixed_integer c_int(c); + fixed_integer L_int(L); //actual size is 8 limbs; padded to 17 + fixed_integer D_int(D); //padded by an extra limb + + //2048 bit D, basis is 512; one limb is 0.125; one bit is 0.002 + //TRACK_MAX(a); // a, 2.00585 <= bits (multiple of basis), 0 <= is negative + //TRACK_MAX(b); // b, 2.00585, 0 + //TRACK_MAX(c); // c, 2.03125, 0 + + //can just look at the top couple limbs of a for this + assert((a<=L)==(a_int<=L_int)); + if (a_int<=L_int) { + return false; + } + + integer v2; + fixed_integer v2_int; + { + gcd_res g=gcd(b, a); + assert(g.gcd==1); + v2=g.s; + + //only b can be negative + //neither a or b can be 0; d=b^2-4ac is prime. if b=0, then d=-4ac=composite. if a=0, then d=b^2; d>=0 + //no constraints on which is greater + v2_int=gcd(b_int, a_int, fixed_integer(), true).s; + assert(integer(v2_int)==v2); + } + //TRACK_MAX(v2); // v2, 2.00195, 1 + + //todo + //start with <0,c> or which is padded to 18 limbs so that the multiplications by 64 bits are exact (same with sums) + //once the new values of uv are calculated, need to reduce modulo a, which is 17 limbs and has been normalized already + //-the normalization also left shifted c + //reducing modulo a only looks at the first couple of limbs so it has the same efficiency as doing it at the end + //-it does require computing the inverse of a a bunch of times which is slow. this will probably slow it down by 2x-4x + //--can avoid this by only reducing every couple of iterations + integer k=(-v2*c)%a; + fixed_integer k_int=fixed_integer(-v2_int*c_int)%a_int; + assert(integer(k_int)==k); + + //print( "v2", v2.to_string() ); + //print( "k", k.to_string() ); + + //TRACK_MAX(v2*c); // v2*c, 4.0039, 1 + //TRACK_MAX(k); // k, 2.0039, 0 + + integer a_copy=a; + integer k_copy=k; + integer co2; + integer co1; + xgcd_partial(co2, co1, a_copy, k_copy, L); //neither input is negative + + const bool same_cofactors=false; //gcd and xgcd_parital can return slightly different results + + fixed_integer co2_int; + fixed_integer co1_int; + fixed_integer a_copy_int; + fixed_integer k_copy_int; + { + // a>L so at least one input is >L initially + //when this terminates, one input is >L and one is <=L + auto g=gcd(a_int, k_int, L_int, false); + co2_int=-g.t; + co1_int=-g.t_2; + a_copy_int=g.gcd; + k_copy_int=g.gcd_2; + + if (same_cofactors) { + assert(integer(co2_int)==co2); + assert(integer(co1_int)==co1); + assert(integer(a_copy_int)==a_copy); + assert(integer(k_copy_int)==k_copy); + } + } + + //print( "co2", co2_int.to_integer().to_string() ); + //print( "co1", co1_int.to_integer().to_string() ); + //print( "a_copy", a_copy_int.to_integer().to_string() ); + //print( "k_copy", k_copy_int.to_integer().to_string() ); + + //todo + //can speed the following operations up with simd (including calculating C but it is done on the slave core) + //division by a can be replaced by multiplication by a inverse. this takes the top N bits of the numerator and denominator inverse + // where N is the number of bits in the result + //if this is done correctly, the calculated result withh be >= the actual result, and it will be == almost all of the time + //to detect if it is >, can calculate the remainder and see if it is too high. this can be done by the slave core during the + // next iteration + //most of the stuff is in registers for avx-512 + //the slave core will precalculate a inverse. it is already dividing by a to calculate c + //this would get rid of the 8x8 batched multiply but not the single limb multiply, since that is still needed for gcd + //for the cofactors which are calculated on the slave core, can use a tree matrix multiplication with the avx-512 code + //for the pentium processor, the adox instruction is banned so the single limb multiply needs to be changed + //the slave core can calculate the inverse of co1 while the master core is calculating A + //for the modulo, the quotient has about 15 bits. can probably calculate the inverse on the master core then since the division + // base case already calculates it with enough precision + //this should work for scalar code also + + //TRACK_MAX(co2); // co2, 1.00195, 1 + //TRACK_MAX(co1); // co1, 1.0039, 1 + //TRACK_MAX(a_copy); // a_copy, 1.03906, 0 + //TRACK_MAX(k_copy); // k_copy, 1, 0 + + //TRACK_MAX(k_copy*k_copy); // k_copy*k_copy, 2, 0 + //TRACK_MAX(b*k_copy); // b*k_copy, 3.0039, 0 + //TRACK_MAX(c*co1); // c*co1, 3.0039, 1 + //TRACK_MAX(b*k_copy-c*co1); // b*k_copy-c*co1, 3.00585, 1 + //TRACK_MAX((b*k_copy-c*co1)/a); // (b*k_copy-c*co1)/a, 1.02539, 1 + //TRACK_MAX(co1*((b*k_copy-c*co1)/a)); // co1*((b*k_copy-c*co1)/a), 2.00585, 1 + + integer A=k_copy*k_copy-co1*((b*k_copy-c*co1)/a); // [exact] + //TRACK_MAX(A); // A, 2.00585, 0 + + fixed_integer A_int; + { + fixed_integer k_copy_k_copy(k_copy_int*k_copy_int); + fixed_integer b_k_copy(b_int*k_copy_int); + fixed_integer c_co1(c_int*co1_int); + fixed_integer b_k_copy_c_co1(b_k_copy-c_co1); + fixed_integer t1(b_k_copy_c_co1/a_int); + fixed_integer t2(co1_int*t1); + A_int=k_copy_k_copy-t2; + + if (same_cofactors) { + assert(integer(A_int)==A); + } + } + + if (co1>=0) { + A=-A; + } + + if (!co1_int.is_negative()) { + A_int=-A_int; + } + + if (same_cofactors) { + assert(integer(A_int)==A); + } + + //TRACK_MAX(A); // A, 2.00585, 1 + //TRACK_MAX(a*k_copy); // a*k_copy, 3.0039, 0 + //TRACK_MAX(A*co2); // A*co2, 3.0039, 0 + //TRACK_MAX((a*k_copy-A*co2)*integer(2)); // (a*k_copy-A*co2)*integer(2), 3.00585, 1 + //TRACK_MAX(((a*k_copy-A*co2)*integer(2))/co1); // ((a*k_copy-A*co2)*integer(2))/co1, 2.03515, 1 + //TRACK_MAX(((a*k_copy-A*co2)*integer(2))/co1 - b); // ((a*k_copy-A*co2)*integer(2))/co1 - b, 2.03515, 1 + + integer B=( ((a*k_copy-A*co2)*integer(2))/co1 - b )%(A*integer(2)); //[exact] + //TRACK_MAX(B); // B, 2.00585, 0 + + fixed_integer B_int; + { + fixed_integer a_k_copy(a_int*k_copy_int); + fixed_integer A_co2(A_int*co2_int); + fixed_integer t1((a_k_copy-A_co2)<<1); + fixed_integer t2(t1/co1_int); + fixed_integer t3(t2-b_int); + + //assert(integer(a_k_copy) == a*k_copy); + //assert(integer(A_co2) == A*co2); + //assert(integer(a_k_copy-A_co2) == (a*k_copy-A*co2)); + + //print(integer(a_k_copy-A_co2).to_string()); + //print(integer(fixed_integer(a_k_copy-A_co2)<<8).to_string()); + + //assert(integer((a_k_copy-A_co2)<<1) == ((a*k_copy-A*co2)*integer(2))); + //assert(integer(t2) == ((a*k_copy-A*co2)*integer(2))/co1); + //assert(integer(t3) == ( ((a*k_copy-A*co2)*integer(2))/co1 - b )); + //assert(integer(A_int<<1) == (A*integer(2))); + B_int=t3%fixed_integer(A_int<<1); + + if (same_cofactors) { + assert(integer(B_int)==B); + } + } + + //TRACK_MAX(B*B); // B*B, 4.01171, 0 + //TRACK_MAX(B*B-D); // B*B-D, 4.01171, 0 + + integer C=((B*B-D)/A)>>2; //[division is exact; right shift is truncation towards 0; can be negative. right shift is exact] + + fixed_integer C_int; + { + fixed_integer B_B(B_int*B_int); + fixed_integer B_B_D(B_B-D_int); + + //calculated at the same time as the division + if (!(B_B_D%A_int).is_zero()) { + //todo //test random error injection + print( "discriminant error" ); + return false; + } + + fixed_integer t1(B_B_D/A_int); + + //assert(integer(B_B)==B*B); + //assert(integer(B_B_D)==B*B-D); + + //print(integer(t1).to_string()); + //print(((B*B-D)/A).to_string()); + + //assert(integer(t1)==((B*B-D)/A)); + + C_int=t1>>2; + + if (same_cofactors) { + assert(integer(C_int)==C); + } + } + + //TRACK_MAX(C); // C, 2.03125, 1 + + if (A<0) { + A=-A; + C=-C; + } + + A_int.set_negative(false); + C_int.set_negative(false); + + //print( "A", A_int.to_integer().to_string() ); + //print( "B", B_int.to_integer().to_string() ); + + if (same_cofactors) { + assert(integer(A_int)==A); + assert(integer(B_int)==B); + assert(integer(C_int)==C); + } + + //TRACK_MAX(A); // A, 2.00585, 0 + //TRACK_MAX(C); // C, 2.03125, 0 + + f.a=A; + f.b=B; + f.c=C; + + //print( "" ); + //print( "" ); + //print( "==========================================" ); + //print( "" ); + //print( "" ); + + // + // + + integer s=integer(a_copy_int); + integer t=integer(k_copy_int); + integer v0=-integer(co2_int); + integer v1=-integer(co1_int); + bool S_negative=(v1<=0); + + integer c_v1=c*v1; + integer b_t=b*t; + integer b_t_c_v1=b_t+c_v1; + integer h=(b*t+c*v1)/a; + if (S_negative) { + h=-h; + } + + integer v1_h=v1*h; + integer t_t_S=t*t; + if (S_negative) { + t_t_S=-t_t_S; + } + + integer v0_2=v0<<1; + integer A_=t_t_S+v1_h; + integer A_2=A_<<1; + integer S_t_v0=t*v0; + if (S_negative) { + S_t_v0=-S_t_v0; + } + + // B=( -((a*t+A*v0)*2)/v1 - b )%(A*2) + // B=( -((a*t+(t*t*S+v1*h)*v0)*2)/v1 - b )%(A*2) + // B=( -((a*t*2 + t*t*S*v0*2 + v1*v0*h*2))/v1 - b )%(A*2) + // B=( -(a*t*2 + t*t*S*v0*2)/v1 - v0*h*2 - b )%(A*2) + // B=( -(t*2(a + t*S*v0))/v1 - v0*h*2 - b )%(A*2) + + integer a_S_t_v0=a+S_t_v0; + integer t_2=t<<1; + integer t_2_a_S_t_v0=t_2*a_S_t_v0; + + integer t_2_a_S_t_v0_v1=t_2_a_S_t_v0/v1; + + //integer t_2_a_S_t_v0_v1=t_2*a_S_t_v0_v1; + + integer e=-t_2_a_S_t_v0_v1-b; + integer v0_2_h=v0_2*h; + integer f_=e-v0_2_h; // -(t*2*((a+S*t*v0)/v1)) - v0*h*2 - b + integer B_=f_%A_2; + A_=abs(A_); + + //print( "A_", A_.to_string() ); + //print( "B_", B_.to_string() ); + + return true; +} \ No newline at end of file diff --git a/lib/chiavdf/inkfish/proof_of_time.py b/lib/chiavdf/inkfish/proof_of_time.py index 3307c176..d4a22d22 100644 --- a/lib/chiavdf/inkfish/proof_of_time.py +++ b/lib/chiavdf/inkfish/proof_of_time.py @@ -81,7 +81,7 @@ def create_proof_of_time_nwesolowski(discriminant, x, iterations, proof = ClassGroup.from_bytes(receive_con.recv_bytes(), discriminant) p.join() - return y_2, proof_2 + serialize_proof([y_1, proof]) + return y_2, proof_2 + iterations_1.to_bytes(8, byteorder="big") + serialize_proof([y_1, proof]) def create_proof_of_time_pietrzak(discriminant, x, iterations, int_size_bits): @@ -115,9 +115,21 @@ def check_proof_of_time_wesolowski(discriminant, x, proof_blob, except Exception: return False - def check_proof_of_time_nwesolowski(discriminant, x, proof_blob, iterations, int_size_bits, recursion): + int_size = (int_size_bits + 16) >> 4 + new_proof_blob = proof_blob[:4 * int_size] + iter_list = [] + for i in range(4 * int_size, len(proof_blob), 4 * int_size + 8): + iter_list.append(int.from_bytes(proof_blob[i : (i + 8)], byteorder="big")) + new_proof_blob = new_proof_blob + proof_blob[(i + 8): (i + 8 + 4 * int_size)] + + return check_proof_of_time_nwesolowski_inner(discriminant, x, new_proof_blob, + iterations, int_size_bits, iter_list, recursion) + + +def check_proof_of_time_nwesolowski_inner(discriminant, x, proof_blob, + iterations, int_size_bits, iter_list, recursion): """ Recursive verification function for nested wesolowski. The proof blob includes the output of the VDF, along with the proof. The following @@ -145,14 +157,14 @@ def check_proof_of_time_nwesolowski(discriminant, x, proof_blob, assert(len(proof) % 2 == 1 and len(proof) > 2) _, _, w = proof_wesolowski.approximate_parameters(iterations) - iterations_1 = (iterations * w) // (w + 1) + iterations_1 = iter_list[-1] iterations_2 = iterations - iterations_1 ver_outer = proof_wesolowski.verify_proof(x, proof[-2], proof[-1], iterations_1) - return ver_outer and check_proof_of_time_nwesolowski(discriminant, proof[-2], + return ver_outer and check_proof_of_time_nwesolowski_inner(discriminant, proof[-2], serialize_proof([y] + proof[:-2]), - iterations_2, int_size_bits, recursion-1) + iterations_2, int_size_bits, iter_list[:-1], recursion-1) except Exception: return False @@ -187,4 +199,4 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -""" +""" \ No newline at end of file diff --git a/src/blockchain.py b/src/blockchain.py index 2e45fefd..69145985 100644 --- a/src/blockchain.py +++ b/src/blockchain.py @@ -344,7 +344,7 @@ class Blockchain: return False # 4. Check PoT - if not block.trunk_block.proof_of_time.is_valid(): + if not block.trunk_block.proof_of_time.is_valid() and not genesis: return False if block.body.coinbase.height != block.trunk_block.challenge.height: diff --git a/src/config/timelord.yaml b/src/config/timelord.yaml index 3145aea0..f3fb54e1 100644 --- a/src/config/timelord.yaml +++ b/src/config/timelord.yaml @@ -3,4 +3,4 @@ port: 8003 # How much recursion to use for the wesolowski VDF proof. This increases the size # of the proofs. -n_wesolowski: 3 +n_wesolowski: 2 diff --git a/src/timelord.py b/src/timelord.py index a5de4b7b..da9ad55c 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -21,14 +21,12 @@ from src.server.outbound_message import OutboundMessage, Delivery, Message, Node class Database: lock: Lock = Lock() challenges: Dict = {} - process_running: bool = False config = yaml.safe_load(open("src/config/timelord.yaml", "r")) log = logging.getLogger(__name__) db = Database() - @api_request async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): """ @@ -37,12 +35,49 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): a new VDF process here. But we don't know how many iterations to run for, so we run forever. """ - # TODO: stop previous processes async with db.lock: + assert(challenge_start.challenge_hash not in db.challenges) disc: int = create_discriminant(challenge_start.challenge_hash, constants.DISCRIMINANT_SIZE_BITS) - db.challenges[challenge_start.challenge_hash] = (time.time(), disc, None) - # TODO: Start a VDF process + command = (f"./lib/chiavdf/fast_vdf/vdf {disc}") + log.info(f"Executing VDF process for discriminant: {disc}") + + proc = await asyncio.create_subprocess_shell( + command, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE) + #stderr=asyncio.subprocess.PIPE) + db.challenges[challenge_start.challenge_hash] = (disc, proc) + + while True: + output = await proc.stdout.readline() + + # Signal that process finished all challenges. + if (output.decode() == "0"*100 + "\n"): + await proc.wait() + async with db.lock: + del db.challenges[challenge_start.challenge_hash] + log.info(f"The process for challenge {challenge_start.challenge_hash} ended") + + stdout_bytes_io: io.BytesIO = io.BytesIO(bytes.fromhex(output[:-1].decode())) + iterations_needed = int.from_bytes(stdout_bytes_io.read(8), "big", signed=True) + + y = ClassgroupElement.parse(stdout_bytes_io) + proof_bytes: bytes = stdout_bytes_io.read() + + # Verifies our own proof just in case + proof_blob = ClassGroup.from_ab_discriminant(y.a, y.b, disc).serialize() + proof_bytes + x = ClassGroup.from_ab_discriminant(2, 1, disc) + assert check_proof_of_time_nwesolowski(disc, x, proof_blob, iterations_needed, 1024, 2) + + output = ProofOfTimeOutput(challenge_start.challenge_hash, + iterations_needed, + ClassgroupElement(y.a, y.b)) + proof_of_time = ProofOfTime(output, config['n_wesolowski'], [uint8(b) for b in proof_bytes]) + response = timelord_protocol.ProofOfTimeFinished(proof_of_time) + + log.info(f"Got PoT for challenge {challenge_start.challenge_hash}") + yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) @api_request async def challenge_end(challenge_end: timelord_protocol.ChallengeEnd): @@ -50,10 +85,12 @@ async def challenge_end(challenge_end: timelord_protocol.ChallengeEnd): A challenge is no longer active, so stop the process for this challenge, if it exists. """ - # TODO: Stop VDF process for this challenge async with db.lock: - db.process_running = False - + if challenge_end.challenge_hash in db.challenges: + _, proc = db.challenges[challenge_end.challenge_hash] + #I'm no longer accepting new challenges, process will finish everything else smoothly. + proc.stdin.write(b'0\n') + await proc.stdin.drain() @api_request async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpaceInfo): @@ -64,56 +101,8 @@ async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpac """ async with db.lock: if proof_of_space_info.challenge_hash not in db.challenges: - log.warning(f"Have not seen challenge {proof_of_space_info.challenge_hash} yet.") - return - time_recvd, disc, iters = db.challenges[proof_of_space_info.challenge_hash] - if iters: - if proof_of_space_info.iterations_needed == iters: - log.warning(f"Have already seen this challenge with {proof_of_space_info.iterations_needed}\ - iterations. Ignoring.") - return - elif proof_of_space_info.iterations_needed > iters: - # TODO: don't ignore, communicate to process - log.warning(f"Too many iterations required. Already executing {iters} iters") - return - if db.process_running: - # TODO: don't ignore, start a new process - log.warning("Already have a running process. Ignoring.") - return - db.process_running = True - - command = (f"python -m lib.chiavdf.inkfish.cmds -t n-wesolowski -l 1024 -d {config['n_wesolowski']} " + - f"{proof_of_space_info.challenge_hash.hex()} {proof_of_space_info.iterations_needed}") - log.info(f"Executing VDF command with new process: {command}") - - process_start = time.time() - proc = await asyncio.create_subprocess_shell( - command, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE) - - stdout, stderr = await proc.communicate() - - async with db.lock: - db.process_running = False - - log.info(f"Finished executing VDF after {int((time.time() - process_start) * 1000)/1000}s") - if stderr: - log.error(f'[stderr]\n{stderr.decode()}') - stdout_bytes_io: io.BytesIO = io.BytesIO(bytes.fromhex(stdout.decode())) - - y = ClassgroupElement.parse(stdout_bytes_io) - proof_bytes: bytes = stdout_bytes_io.read() - - # Verifies our own proof just in case - proof_blob = ClassGroup.from_ab_discriminant(y.a, y.b, disc).serialize() + proof_bytes - x = ClassGroup.from_ab_discriminant(2, 1, disc) - assert check_proof_of_time_nwesolowski(disc, x, proof_blob, proof_of_space_info.iterations_needed, 1024, 3) - - output = ProofOfTimeOutput(proof_of_space_info.challenge_hash, - proof_of_space_info.iterations_needed, - ClassgroupElement(y.a, y.b)) - proof_of_time = ProofOfTime(output, config['n_wesolowski'], [uint8(b) for b in proof_bytes]) - response = timelord_protocol.ProofOfTimeFinished(proof_of_time) - - yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) + log.warn(f"Have not seen challenge {proof_of_space_info.challenge_hash} yet.") + return + _, proc = db.challenges[proof_of_space_info.challenge_hash] + proc.stdin.write((str(proof_of_space_info.iterations_needed) + "\n").encode()) + await proc.stdin.drain() From 2b6d85b4aff38abcfbafa1b2c838f6322b97da36 Mon Sep 17 00:00:00 2001 From: fchirica Date: Mon, 30 Sep 2019 18:56:11 +0300 Subject: [PATCH 02/13] Dummy VDF process --- lib/chiavdf/fast_vdf/vdf.cpp | 11 +++++++---- src/timelord.py | 19 +++++++++++-------- src/types/proof_of_time.py | 1 + 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index 70cc8eea..1ca79149 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -650,10 +650,13 @@ Proof CreateProofOfTimeNWesolowski(integer& D, form x, int64_t num_iterations, std::mutex main_mutex; void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallback& weso) { - Proof result = CreateProofOfTimeNWesolowski(D, x, num_iterations, 0, weso, 2, 0); + //Proof result = CreateProofOfTimeNWesolowski(D, x, num_iterations, 0, weso, 2, 0); + std::this_thread::sleep_for (std::chrono::seconds(1 + num_iterations / 500)); std::lock_guard lock(main_mutex); std::cout << BytesToStr(ConvertIntegerToBytes(integer(num_iterations), 8)); - std::cout << result.hex() << "\n" << std::flush; + std::cout << BytesToStr(SerializeForm(weso, x, 129)); + //std::cout << result.hex() << "\n" << std::flush; + std::cout << "0020d326c63c7f1782ce7abae04f2464357d5d7b4e3788ef34e44896929c6ad7173ed8c9ea4f5c6c1b6ee20cfbb774e6373cda8d2278bed0781867208b993baa9d0011cf7e89a5f519d34c548aafd63dc5f15a472fede0c1e7b1a7ecf6bf323de61bd8e684b88323d9a7567d698d80b9ff3c148eb1a1ca335d4d4c4fe1c7ba2a914b000000000000012c002052df9df1f29eed204ea18ab1dad68d5ee66784c0568f90a08856223b89101532c443b895b7e050f55c6d6d1a998068f3f9891b1e6e0a81870be653523a4c2cffe860aa2dab86a08fa78c9e949167a1a7b81a2734af3493fe39547de776a0206d02b006430551cfbff9567b0a1bd232837510d32af8173b96c6454ad7b1438069005ef7b973223abca1ed93348a1a0e84d64693d800cac6066ac1bc3e0441100691d9272070842ddcc35ec0545b817982e3e6c9677a047660f19620b2685204214200376489a14ce7ee5b2c528d9fb74cc8ee9d9427376c3acec3d02a854f52313cfbcf77c6d4e50b48be4d38ff68e5abdac7016a3616e061253f29d545a0e30dcb85000000000000012c00191f9a4148916b97cd0feffb58fb29aa30bad06f88c4709b2446334dbe5a1f150bf10563fa481e72f5e2285237835e20d47ac7f14702ca3ab594847978f36ecdfff83ede73376b636a60ecda5968577df2bdec43b5fbee001b61a0d497f07d093a87e1142a2ddd1bd8713e5c8425b2e6de648be532ba1ee766a8934792b5ccb3dd003b92b42beb4bb3ddd0b6371ece5c71682194be20bf1c3b27ad271de4eca9ceaba2632ddb000ba13a0bd1064066c104f70e1480f87c29e245340dd3a0dbf8b4d40005b5266665a0ebe98df87af2132a4a30e5bbb576cff3febf815ecc9870f671f7b00c2963f504901801affc8b97aead35fba69c324cd4142310705741f347ebb1" << "\n" << std::flush; } int main(int argc, char* argv[]) { @@ -692,7 +695,7 @@ int main(int argc, char* argv[]) { weso.L = L; weso.kl = 10; - std::thread vdf_worker(repeated_square, f, D, L, std::ref(weso), std::ref(stop_signal)); + //std::thread vdf_worker(repeated_square, f, D, L, std::ref(weso), std::ref(stop_signal)); while(!stop_signal) { std::this_thread::sleep_for (std::chrono::seconds(2)); @@ -706,7 +709,7 @@ int main(int argc, char* argv[]) { threads[t].join(); } stop_signal = true; - vdf_worker.join(); + //vdf_worker.join(); std::lock_guard lock(main_mutex); for (int i = 0; i < 100; i++) std::cout << "0"; diff --git a/src/timelord.py b/src/timelord.py index da9ad55c..d6ef72bb 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -21,7 +21,7 @@ from src.server.outbound_message import OutboundMessage, Delivery, Message, Node class Database: lock: Lock = Lock() challenges: Dict = {} - + finished_challenges = [] config = yaml.safe_load(open("src/config/timelord.yaml", "r")) log = logging.getLogger(__name__) @@ -34,7 +34,7 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): should be started on it. We can generate a classgroup (discriminant), and start a new VDF process here. But we don't know how many iterations to run for, so we run forever. - """ + """ async with db.lock: assert(challenge_start.challenge_hash not in db.challenges) disc: int = create_discriminant(challenge_start.challenge_hash, constants.DISCRIMINANT_SIZE_BITS) @@ -45,10 +45,9 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): command, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE) - #stderr=asyncio.subprocess.PIPE) db.challenges[challenge_start.challenge_hash] = (disc, proc) - + while True: output = await proc.stdout.readline() @@ -58,17 +57,17 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): async with db.lock: del db.challenges[challenge_start.challenge_hash] log.info(f"The process for challenge {challenge_start.challenge_hash} ended") + return stdout_bytes_io: io.BytesIO = io.BytesIO(bytes.fromhex(output[:-1].decode())) iterations_needed = int.from_bytes(stdout_bytes_io.read(8), "big", signed=True) - y = ClassgroupElement.parse(stdout_bytes_io) proof_bytes: bytes = stdout_bytes_io.read() # Verifies our own proof just in case proof_blob = ClassGroup.from_ab_discriminant(y.a, y.b, disc).serialize() + proof_bytes x = ClassGroup.from_ab_discriminant(2, 1, disc) - assert check_proof_of_time_nwesolowski(disc, x, proof_blob, iterations_needed, 1024, 2) + #assert check_proof_of_time_nwesolowski(disc, x, proof_blob, iterations_needed, 1024, 2) output = ProofOfTimeOutput(challenge_start.challenge_hash, iterations_needed, @@ -86,11 +85,14 @@ async def challenge_end(challenge_end: timelord_protocol.ChallengeEnd): exists. """ async with db.lock: - if challenge_end.challenge_hash in db.challenges: + if challenge_end.challenge_hash not in db.finished_challenges: _, proc = db.challenges[challenge_end.challenge_hash] #I'm no longer accepting new challenges, process will finish everything else smoothly. proc.stdin.write(b'0\n') await proc.stdin.drain() + db.finished_challenges.append(challenge_end.challenge_hash) + else: + log.info("Trying to close the challenge multiple times..") @api_request async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpaceInfo): @@ -103,6 +105,7 @@ async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpac if proof_of_space_info.challenge_hash not in db.challenges: log.warn(f"Have not seen challenge {proof_of_space_info.challenge_hash} yet.") return + assert(proof_of_space_info.challenge_hash not in db.finished_challenges) _, proc = db.challenges[proof_of_space_info.challenge_hash] proc.stdin.write((str(proof_of_space_info.iterations_needed) + "\n").encode()) - await proc.stdin.drain() + await proc.stdin.drain() \ No newline at end of file diff --git a/src/types/proof_of_time.py b/src/types/proof_of_time.py index 861d31ed..5c9ece82 100644 --- a/src/types/proof_of_time.py +++ b/src/types/proof_of_time.py @@ -28,6 +28,7 @@ class ProofOfTime: x = ClassGroup.from_ab_discriminant(2, 1, disc) y = ClassGroup.from_ab_discriminant(self.output.output.a, self.output.output.b, disc) + return True return check_proof_of_time_nwesolowski(disc, x, y.serialize() + bytes(self.witness), self.output.number_of_iterations, constants.DISCRIMINANT_SIZE_BITS, From d56c028494599627de980014839054740c4c7e79 Mon Sep 17 00:00:00 2001 From: fchirica Date: Fri, 4 Oct 2019 17:34:27 +0300 Subject: [PATCH 03/13] New socket protocol --- lib/chiavdf/fast_vdf/vdf.cpp | 256 ++++++++++++++++++++++++----------- src/server/start_timelord.py | 1 - src/timelord.py | 148 ++++++++++++-------- src/types/proof_of_time.py | 1 - 4 files changed, 270 insertions(+), 136 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index 1ca79149..e61d0b46 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -41,8 +41,13 @@ #include "ClassGroup.h" #include "Reducer.h" +#include + bool warn_on_corruption_in_production=false; +using boost::asio::ip::tcp; + + struct akashnil_form { // y = ax^2 + bxy + y^2 mpz_t a; @@ -227,6 +232,11 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb while (!stopped) { uint64 c_checkpoint_interval=checkpoint_interval; + + if (weso.iterations >= 500000) { + std::cout << "Stopping weso at 500000 iterations!\n"; + return ; + } #ifdef VDF_TEST form f_copy; @@ -438,7 +448,7 @@ struct Proof { #define PULMARK 1 -form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, uint64_t num_iterations, uint64_t k, uint64_t l, WesolowskiCallback& weso) { +form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, uint64_t num_iterations, uint64_t k, uint64_t l, WesolowskiCallback& weso, bool& stop_signal) { auto t1 = std::chrono::high_resolution_clock::now(); #if PULMARK @@ -465,7 +475,7 @@ form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, ys[i] = form::identity(D); form *tmp; - for (uint64_t i = 0; i < ceil(1.0 * num_iterations / (k * l)); i++) { + for (uint64_t i = 0; !stop_signal && i < ceil(1.0 * num_iterations / (k * l)); i++) { if (num_iterations >= k * (i * l + j + 1)) { uint64_t b = GetBlock(i*l + j, k, num_iterations, B); tmp = weso.GetForm(done_iterations + i * k * l); @@ -487,9 +497,12 @@ form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, } } - for (uint64_t b1 = 0; b1 < (1 << k1); b1++) { + if (stop_signal) + return form(); + + for (uint64_t b1 = 0; b1 < (1 << k1) && !stop_signal; b1++) { form z = form::identity(D); - for (uint64_t b0 = 0; b0 < (1 << k0); b0++) { + for (uint64_t b0 = 0; b0 < (1 << k0) && !stop_signal; b0++) { nucomp_form(z, z, ys[b1 * (1 << k0) + b0], D, L); #if PULMARK // Pulmark reduce based on Akashnil reduce @@ -510,9 +523,9 @@ form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, x = x * z; } - for (uint64_t b0 = 0; b0 < (1 << k0); b0++) { + for (uint64_t b0 = 0; b0 < (1 << k0) && !stop_signal; b0++) { form z = form::identity(D); - for (uint64_t b1 = 0; b1 < (1 << k1); b1++) { + for (uint64_t b1 = 0; b1 < (1 << k1) && !stop_signal; b1++) { nucomp_form(z, z, ys[b1 * (1 << k0) + b0], D, L); #if PULMARK // Pulmark reduce based on Akashnil reduce @@ -532,6 +545,9 @@ form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, z = FastPowForm(z, D, b0); x = x * z; } + + if (stop_signal) + return form(); } #if PULMARK @@ -558,12 +574,12 @@ form GenerateProof(form &y, form &x_init, integer &D, uint64_t done_iterations, } void GenerateProofThreaded(std::promise && form_promise, form y, form x_init, integer D, uint64_t done_iterations, uint64_t num_iterations, uint64_t -k, uint64_t l, WesolowskiCallback& weso) { - form proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso); +k, uint64_t l, WesolowskiCallback& weso, bool& stop_signal) { + form proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso, stop_signal); form_promise.set_value(proof); } -Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, uint64_t done_iterations, WesolowskiCallback& weso) { +Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, uint64_t done_iterations, WesolowskiCallback& weso, bool& stop_signal) { uint64_t l, k, w; form x_init = x; integer L=root(-D, 4); @@ -572,12 +588,18 @@ Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, ui w = 2; l = (num_iterations >= 10000000) ? 10 : 1; - while (weso.iterations < done_iterations + num_iterations) { + while (!stop_signal && weso.iterations < done_iterations + num_iterations) { std::this_thread::sleep_for (std::chrono::seconds(3)); } + + if (stop_signal) + return Proof(); form y = weso.GetFormFromCheckpoint(done_iterations + num_iterations); - auto proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso); + auto proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso, stop_signal); + + if (stop_signal) + return Proof(); int int_size = (D.num_bits() + 16) >> 4; @@ -589,7 +611,7 @@ Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, ui } Proof CreateProofOfTimeNWesolowski(integer& D, form x, int64_t num_iterations, - uint64_t done_iterations, WesolowskiCallback& weso, int depth_limit, int depth) { + uint64_t done_iterations, WesolowskiCallback& weso, int depth_limit, int depth, bool& stop_signal) { uint64_t l, k, w; int64_t iterations1, iterations2; integer L=root(-D, 4); @@ -609,26 +631,30 @@ Proof CreateProofOfTimeNWesolowski(integer& D, form x, int64_t num_iterations, iterations1 = iterations1 - iterations1 % 100; iterations2 = num_iterations - iterations1; - while (weso.iterations < done_iterations + iterations1) { + while (!stop_signal && weso.iterations < done_iterations + iterations1) { std::this_thread::sleep_for (std::chrono::seconds(3)); } - + + if (stop_signal) + return Proof(); form y1 = *weso.GetForm(done_iterations + iterations1); std::promise form_promise; auto form_future = form_promise.get_future(); - std::thread t(&GenerateProofThreaded, std::move(form_promise), y1, x_init, D, done_iterations, iterations1, k, l, std::ref(weso)); + std::thread t(&GenerateProofThreaded, std::move(form_promise), y1, x_init, D, done_iterations, iterations1, k, l, std::ref(weso), std::ref(stop_signal)); Proof proof2; if (depth < depth_limit - 1) { - proof2 = CreateProofOfTimeNWesolowski(D, y1, iterations2, done_iterations + iterations1, weso, depth_limit, depth + 1); + proof2 = CreateProofOfTimeNWesolowski(D, y1, iterations2, done_iterations + iterations1, weso, depth_limit, depth + 1, stop_signal); } else { - proof2 = CreateProofOfTimeWesolowski(D, y1, iterations2, done_iterations + iterations1, weso); + proof2 = CreateProofOfTimeWesolowski(D, y1, iterations2, done_iterations + iterations1, weso, stop_signal); } t.join(); + if (stop_signal) + return Proof(); form proof = form_future.get(); int int_size = (D.num_bits() + 16) >> 4; @@ -647,78 +673,154 @@ Proof CreateProofOfTimeNWesolowski(integer& D, form x, int64_t num_iterations, return final_proof; } -std::mutex main_mutex; +std::mutex socket_mutex; -void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallback& weso) { - //Proof result = CreateProofOfTimeNWesolowski(D, x, num_iterations, 0, weso, 2, 0); - std::this_thread::sleep_for (std::chrono::seconds(1 + num_iterations / 500)); - std::lock_guard lock(main_mutex); - std::cout << BytesToStr(ConvertIntegerToBytes(integer(num_iterations), 8)); - std::cout << BytesToStr(SerializeForm(weso, x, 129)); - //std::cout << result.hex() << "\n" << std::flush; - std::cout << "0020d326c63c7f1782ce7abae04f2464357d5d7b4e3788ef34e44896929c6ad7173ed8c9ea4f5c6c1b6ee20cfbb774e6373cda8d2278bed0781867208b993baa9d0011cf7e89a5f519d34c548aafd63dc5f15a472fede0c1e7b1a7ecf6bf323de61bd8e684b88323d9a7567d698d80b9ff3c148eb1a1ca335d4d4c4fe1c7ba2a914b000000000000012c002052df9df1f29eed204ea18ab1dad68d5ee66784c0568f90a08856223b89101532c443b895b7e050f55c6d6d1a998068f3f9891b1e6e0a81870be653523a4c2cffe860aa2dab86a08fa78c9e949167a1a7b81a2734af3493fe39547de776a0206d02b006430551cfbff9567b0a1bd232837510d32af8173b96c6454ad7b1438069005ef7b973223abca1ed93348a1a0e84d64693d800cac6066ac1bc3e0441100691d9272070842ddcc35ec0545b817982e3e6c9677a047660f19620b2685204214200376489a14ce7ee5b2c528d9fb74cc8ee9d9427376c3acec3d02a854f52313cfbcf77c6d4e50b48be4d38ff68e5abdac7016a3616e061253f29d545a0e30dcb85000000000000012c00191f9a4148916b97cd0feffb58fb29aa30bad06f88c4709b2446334dbe5a1f150bf10563fa481e72f5e2285237835e20d47ac7f14702ca3ab594847978f36ecdfff83ede73376b636a60ecda5968577df2bdec43b5fbee001b61a0d497f07d093a87e1142a2ddd1bd8713e5c8425b2e6de648be532ba1ee766a8934792b5ccb3dd003b92b42beb4bb3ddd0b6371ece5c71682194be20bf1c3b27ad271de4eca9ceaba2632ddb000ba13a0bd1064066c104f70e1480f87c29e245340dd3a0dbf8b4d40005b5266665a0ebe98df87af2132a4a30e5bbb576cff3febf815ecc9870f671f7b00c2963f504901801affc8b97aead35fba69c324cd4142310705741f347ebb1" << "\n" << std::flush; +void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallback& weso, bool& stop_signal, tcp::socket& sock) { + Proof result = CreateProofOfTimeNWesolowski(D, x, num_iterations, 0, weso, 2, 0, stop_signal); + if (stop_signal == true) { + std::cout << "Got stop signal before completing the proof!\n"; + return ; + } + std::vector bytes = ConvertIntegerToBytes(integer(num_iterations), 8); + bytes.insert(bytes.end(), result.y.begin(), result.y.end()); + bytes.insert(bytes.end(), result.proof.begin(), result.proof.end()); + std::string str_result = BytesToStr(bytes); + std::cout << "Generated proof = " << str_result << "\n"; + std::lock_guard lock(socket_mutex); + boost::asio::write(sock, boost::asio::buffer(str_result.c_str(), str_result.size())); } -int main(int argc, char* argv[]) { - if (getenv( "warn_on_corruption_in_production" )!=nullptr) { - warn_on_corruption_in_production=true; - } - if (is_vdf_test) { - print( "=== Test mode ===" ); - } - if (warn_on_corruption_in_production) { - print( "=== Warn on corruption enabled ===" ); - } - assert(is_vdf_test); //assertions should be disabled in VDF_MODE==0 - init_gmp(); - allow_integer_constructor=true; //make sure the old gmp allocator isn't used - set_rounding_mode(); - vdf_original::init(); +const int max_length = 2048; - integer D(argv[1]); - integer L=root(-D, 4); - form f=form::generator(D); +void session(tcp::socket sock) { + try { + char disc[350]; + char disc_size[5]; + boost::system::error_code error; - bool stop_signal = false; - uint64_t num_iterations; - std::set seen_iterations; + boost::asio::read(sock, boost::asio::buffer(disc_size, 3), error); + int disc_int_size = atoi(disc_size); - std::vector threads; - WesolowskiCallback weso(100000000); - - mpz_init(weso.forms[0].a.impl); - mpz_init(weso.forms[0].b.impl); - mpz_init(weso.forms[0].c.impl); - - weso.forms[0]=f; - weso.D = D; - weso.L = L; - weso.kl = 10; + boost::asio::read(sock, boost::asio::buffer(disc, disc_int_size), error); - //std::thread vdf_worker(repeated_square, f, D, L, std::ref(weso), std::ref(stop_signal)); + integer D(disc); - while(!stop_signal) { - std::this_thread::sleep_for (std::chrono::seconds(2)); + std::cout << "Discriminant = " << D.impl << "\n"; - cin >> num_iterations; - if (seen_iterations.size() > 0 && num_iterations >= *seen_iterations.begin()) - continue; + // Init VDF the discriminant... - if (num_iterations == 0) { - for (int t = 0; t < threads.size(); t++) { - threads[t].join(); + if (error == boost::asio::error::eof) + return ; // Connection closed cleanly by peer. + else if (error) + throw boost::system::system_error(error); // Some other error. + + if (getenv( "warn_on_corruption_in_production" )!=nullptr) { + warn_on_corruption_in_production=true; + } + if (is_vdf_test) { + print( "=== Test mode ===" ); + } + if (warn_on_corruption_in_production) { + print( "=== Warn on corruption enabled ===" ); + } + assert(is_vdf_test); //assertions should be disabled in VDF_MODE==0 + init_gmp(); + allow_integer_constructor=true; //make sure the old gmp allocator isn't used + set_rounding_mode(); + vdf_original::init(); + + integer L=root(-D, 4); + form f=form::generator(D); + + bool stop_signal = false; + std::set seen_iterations; + + std::vector threads; + WesolowskiCallback weso(1000000); + + mpz_init(weso.forms[0].a.impl); + mpz_init(weso.forms[0].b.impl); + mpz_init(weso.forms[0].c.impl); + + weso.forms[0]=f; + weso.D = D; + weso.L = L; + weso.kl = 10; + + bool stopped = false; + std::thread vdf_worker(repeated_square, f, D, L, std::ref(weso), std::ref(stopped)); + + // Tell client that I'm ready to get the challenges. + boost::asio::write(sock, boost::asio::buffer("OK", 2)); + char data[10]; + + while (!stopped) { + memset(data, 0, sizeof(data)); + boost::asio::read(sock, boost::asio::buffer(data, 1), error); + int size = data[0] - '0'; + boost::asio::read(sock, boost::asio::buffer(data, size), error); + int iters = atoi(data); + std::cout << "Got iterations " << iters << "\n"; + if (seen_iterations.size() > 0 && iters != 0) { + std::cout << "Ignoring..." << iters << "\n"; + continue; } - stop_signal = true; - //vdf_worker.join(); - std::lock_guard lock(main_mutex); - for (int i = 0; i < 100; i++) - std::cout << "0"; - std::cout << "\n" << std::flush; - } else { - if (seen_iterations.find(num_iterations) == seen_iterations.end()) { - seen_iterations.insert(num_iterations); - threads.push_back(std::thread(NWesolowskiMain, D, f, num_iterations, std::ref(weso))); + + if (iters == 0) { + stopped = true; + for (int t = 0; t < threads.size(); t++) { + threads[t].join(); + } + vdf_worker.join(); + } else { + if (seen_iterations.find(iters) == seen_iterations.end()) { + seen_iterations.insert(iters); + threads.push_back(std::thread(NWesolowskiMain, D, f, iters, std::ref(weso), std::ref(stopped), + std::ref(sock))); + } } } + // Tell client I've stopped everything, wait for ACK and close. + std::lock_guard lock(socket_mutex); + boost::asio::write(sock, boost::asio::buffer("STOP", 4)); + std::cout << "Stopped everything! Ready for the next challenge.\n"; + + char ack[5]; + boost::asio::read(sock, boost::asio::buffer(ack, 3), error); + assert (strncmp(ack, "ACK", 3) == 0); + } catch (std::exception& e) { + std::cerr << "Exception in thread: " << e.what() << "\n"; } +} + +void server(boost::asio::io_context& io_context, unsigned short port) +{ + tcp::acceptor a(io_context, tcp::endpoint(tcp::v4(), port)); + for (;;) + { + std::thread t(session, a.accept()); + t.join(); + } +} + +int main(int argc, char* argv[]) +{ + try + { + if (argc != 2) + { + std::cerr << "Usage: blocking_tcp_echo_server \n"; + return 1; + } + + boost::asio::io_context io_context; + + server(io_context, std::atoi(argv[1])); + } + catch (std::exception& e) + { + std::cerr << "Exception: " << e.what() << "\n"; + } + + return 0; } \ No newline at end of file diff --git a/src/server/start_timelord.py b/src/server/start_timelord.py index 04ae209b..4158648b 100644 --- a/src/server/start_timelord.py +++ b/src/server/start_timelord.py @@ -7,7 +7,6 @@ from src import timelord logging.basicConfig(format='Timelord %(name)-23s: %(levelname)-8s %(message)s', level=logging.INFO) - async def main(): host, port = parse_host_port(timelord) server, _ = await start_chia_server(host, port, timelord, NodeType.FULL_NODE) diff --git a/src/timelord.py b/src/timelord.py index d6ef72bb..7dc20206 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -2,6 +2,7 @@ import logging import asyncio import time import io +import sys import yaml from asyncio import Lock from typing import Dict @@ -17,15 +18,16 @@ from src.util.ints import uint8 from src.consensus import constants from src.server.outbound_message import OutboundMessage, Delivery, Message, NodeType - class Database: lock: Lock = Lock() - challenges: Dict = {} - finished_challenges = [] + free_servers = [] + active_discriminants: Dict = {} + done_discriminants = [] -config = yaml.safe_load(open("src/config/timelord.yaml", "r")) log = logging.getLogger(__name__) +config = yaml.safe_load(open("src/config/timelord.yaml", "r")) db = Database() +db.free_servers.append(8889) @api_request async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): @@ -34,50 +36,77 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): should be started on it. We can generate a classgroup (discriminant), and start a new VDF process here. But we don't know how many iterations to run for, so we run forever. - """ + """ + + disc: int = create_discriminant(challenge_start.challenge_hash, constants.DISCRIMINANT_SIZE_BITS) + + #Wait for a server to become free. + port = None + while (port is None): + async with db.lock: + if (len(db.free_servers) != 0): + port = db.free_servers[0] + db.free_servers = db.free_servers[1:] + log.info(f"Discriminant {disc} attached to port {port}.") + #Poll until a server becomes free. + if (port is None): + await asyncio.sleep(3) + + #TODO(Florin): Handle connection failure (attempt another server) + try: + reader, writer = await asyncio.open_connection('127.0.0.1', port) + except Exception as e: + e_to_str = str(e) + log.error(f"Connection to VDF server error message: {e_to_str}") + + writer.write((str(len(str(disc))) + str(disc)).encode()) + await writer.drain() + + ok = await reader.readexactly(2) + assert(ok.decode() == "OK") + + log.info("Got handshake with VDF server.") + async with db.lock: - assert(challenge_start.challenge_hash not in db.challenges) - disc: int = create_discriminant(challenge_start.challenge_hash, constants.DISCRIMINANT_SIZE_BITS) - command = (f"./lib/chiavdf/fast_vdf/vdf {disc}") - log.info(f"Executing VDF process for discriminant: {disc}") - - proc = await asyncio.create_subprocess_shell( - command, - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE) + db.active_discriminants[challenge_start.challenge_hash] = writer - db.challenges[challenge_start.challenge_hash] = (disc, proc) - - while True: - output = await proc.stdout.readline() - - # Signal that process finished all challenges. - if (output.decode() == "0"*100 + "\n"): - await proc.wait() + #Listen to the server until "STOP" is received. + while(True): + data = await reader.readexactly(4) + if (data.decode() == "STOP"): + #Server is now available. async with db.lock: - del db.challenges[challenge_start.challenge_hash] - log.info(f"The process for challenge {challenge_start.challenge_hash} ended") - return + writer.write(b"ACK") + await writer.drain() + db.free_servers.append(port) + break + else: + try: + #This must be a proof, read the continuation. + proof = await reader.readexactly(1860) + stdout_bytes_io: io.BytesIO = io.BytesIO(bytes.fromhex(data.decode() + proof.decode())) + iterations_needed = int.from_bytes(stdout_bytes_io.read(8), "big", signed=True) + y = ClassgroupElement.parse(stdout_bytes_io) + proof_bytes: bytes = stdout_bytes_io.read() - stdout_bytes_io: io.BytesIO = io.BytesIO(bytes.fromhex(output[:-1].decode())) - iterations_needed = int.from_bytes(stdout_bytes_io.read(8), "big", signed=True) - y = ClassgroupElement.parse(stdout_bytes_io) - proof_bytes: bytes = stdout_bytes_io.read() + # Verifies our own proof just in case + proof_blob = ClassGroup.from_ab_discriminant(y.a, y.b, disc).serialize() + proof_bytes + x = ClassGroup.from_ab_discriminant(2, 1, disc) + assert check_proof_of_time_nwesolowski(disc, x, proof_blob, iterations_needed, 1024, 2) - # Verifies our own proof just in case - proof_blob = ClassGroup.from_ab_discriminant(y.a, y.b, disc).serialize() + proof_bytes - x = ClassGroup.from_ab_discriminant(2, 1, disc) - #assert check_proof_of_time_nwesolowski(disc, x, proof_blob, iterations_needed, 1024, 2) + output = ProofOfTimeOutput(challenge_start.challenge_hash, + iterations_needed, + ClassgroupElement(y.a, y.b)) + proof_of_time = ProofOfTime(output, config['n_wesolowski'], [uint8(b) for b in proof_bytes]) + response = timelord_protocol.ProofOfTimeFinished(proof_of_time) - output = ProofOfTimeOutput(challenge_start.challenge_hash, - iterations_needed, - ClassgroupElement(y.a, y.b)) - proof_of_time = ProofOfTime(output, config['n_wesolowski'], [uint8(b) for b in proof_bytes]) - response = timelord_protocol.ProofOfTimeFinished(proof_of_time) - - log.info(f"Got PoT for challenge {challenge_start.challenge_hash}") - yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) + log.info(f"Got PoT for challenge {challenge_start.challenge_hash}") + yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) + except Exception as e: + e_to_str = str(e) + log.error(f"Socket error: {e_to_str}") + @api_request async def challenge_end(challenge_end: timelord_protocol.ChallengeEnd): """ @@ -85,14 +114,14 @@ async def challenge_end(challenge_end: timelord_protocol.ChallengeEnd): exists. """ async with db.lock: - if challenge_end.challenge_hash not in db.finished_challenges: - _, proc = db.challenges[challenge_end.challenge_hash] - #I'm no longer accepting new challenges, process will finish everything else smoothly. - proc.stdin.write(b'0\n') - await proc.stdin.drain() - db.finished_challenges.append(challenge_end.challenge_hash) - else: - log.info("Trying to close the challenge multiple times..") + if (challenge_end.challenge_hash in db.done_discriminants): + return + if (challenge_end.challenge_hash in db.active_discriminants): + writer = db.active_discriminants[challenge_end.challenge_hash] + writer.write(b'10') + await writer.drain() + del db.active_discriminants[challenge_end.challenge_hash] + db.done_discriminants.append(challenge_end.challenge_hash) @api_request async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpaceInfo): @@ -101,11 +130,16 @@ async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpac have a process for this challenge, we should communicate to the process to tell it how many iterations to run for. TODO: process should be started in challenge_start instead. """ - async with db.lock: - if proof_of_space_info.challenge_hash not in db.challenges: - log.warn(f"Have not seen challenge {proof_of_space_info.challenge_hash} yet.") - return - assert(proof_of_space_info.challenge_hash not in db.finished_challenges) - _, proc = db.challenges[proof_of_space_info.challenge_hash] - proc.stdin.write((str(proof_of_space_info.iterations_needed) + "\n").encode()) - await proc.stdin.drain() \ No newline at end of file + + while (True): + async with db.lock: + if (proof_of_space_info.challenge_hash in db.active_discriminants): + writer = db.active_discriminants[proof_of_space_info.challenge_hash] + writer.write((str(len(str(proof_of_space_info.iterations_needed))) + + str(proof_of_space_info.iterations_needed)).encode()) + await writer.drain() + return + if (proof_of_space_info.challenge_hash in db.done_discriminants): + log.info("Got iters for a finished challenge") + return + await asyncio.sleep(3) diff --git a/src/types/proof_of_time.py b/src/types/proof_of_time.py index 5c9ece82..861d31ed 100644 --- a/src/types/proof_of_time.py +++ b/src/types/proof_of_time.py @@ -28,7 +28,6 @@ class ProofOfTime: x = ClassGroup.from_ab_discriminant(2, 1, disc) y = ClassGroup.from_ab_discriminant(self.output.output.a, self.output.output.b, disc) - return True return check_proof_of_time_nwesolowski(disc, x, y.serialize() + bytes(self.witness), self.output.number_of_iterations, constants.DISCRIMINANT_SIZE_BITS, From e95ac87647c47165b365120a674e88dc42d31513 Mon Sep 17 00:00:00 2001 From: Bill Blanke Date: Fri, 4 Oct 2019 11:43:00 -0700 Subject: [PATCH 04/13] uninitialized var and null terminated strings --- lib/chiavdf/fast_vdf/vdf.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index e61d0b46..b4f442ab 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -93,7 +93,7 @@ public: bool deferred; int64_t switch_iters = -1; int64_t switch_index; - int64_t iterations; + int64_t iterations = 0; // This must be intialized to zero at start integer D; integer L; @@ -698,6 +698,9 @@ void session(tcp::socket sock) { char disc_size[5]; boost::system::error_code error; + memset(disc,0x00,sizeof(disc)); // For null termination + memset(disc_size,0x00,sizeof(disc_size)); // For null termination + boost::asio::read(sock, boost::asio::buffer(disc_size, 3), error); int disc_int_size = atoi(disc_size); @@ -823,4 +826,4 @@ int main(int argc, char* argv[]) } return 0; -} \ No newline at end of file +} From a66fe8c18495d14d104ca8ea8df95724b236efa3 Mon Sep 17 00:00:00 2001 From: Bill Blanke Date: Fri, 4 Oct 2019 13:12:14 -0700 Subject: [PATCH 05/13] made vdf_original thread safe since it is called by squaring thread in addition to proof threads --- lib/chiavdf/fast_vdf/vdf.cpp | 33 ++++++++++++++--------------- lib/chiavdf/fast_vdf/vdf_original.h | 31 +++++++++++++-------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index b4f442ab..694b814d 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -59,17 +59,16 @@ struct akashnil_form { const int64_t THRESH = 1UL<<31; const int64_t EXP_THRESH = 31; - //always works -void repeated_square_original(form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { +void repeated_square_original(vdf_original &vdfo, form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { vdf_original::form f_in,*f_res; f_in.a[0]=f.a.impl[0]; f_in.b[0]=f.b.impl[0]; f_in.c[0]=f.c.impl[0]; f_res=&f_in; - + for (uint64_t i=0; i < iterations; i++) { - f_res = vdf_original::square(*f_res); + f_res = vdfo.square(*f_res); if(nuduplListener!=NULL) nuduplListener->OnIteration(NL_FORM,f_res,base+i); @@ -78,11 +77,8 @@ void repeated_square_original(form& f, const integer& D, const integer& L, uint6 mpz_set(f.a.impl, f_res->a); mpz_set(f.b.impl, f_res->b); mpz_set(f.c.impl, f_res->c); - - //vdf_original::form f_res=vdf_original::repeated_square(&f_in, base, iterations); } - class WesolowskiCallback :public INUDUPLListener { public: uint64_t kl; @@ -100,6 +96,8 @@ public: ClassGroupContext *t; Reducer *reducer; + + vdf_original vdfo; WesolowskiCallback(uint64_t expected_space) { forms = (form*) malloc(sizeof(struct form) * expected_space); @@ -150,15 +148,15 @@ public: form *GetForm(int power) { return &(forms[GetPosition(power)]); } - - form GetFormFromCheckpoint(int power) { + + form GetFormFromCheckpoint(vdf_original &vdfo, int power) { uint64 checkpoint = power - power % 100; form checkpoint_form; mpz_init(checkpoint_form.a.impl); mpz_init(checkpoint_form.b.impl); mpz_init(checkpoint_form.c.impl); checkpoint_form = forms[GetPosition(checkpoint)]; - repeated_square_original(checkpoint_form, D, L, 0, power % 100, NULL); + repeated_square_original(vdfo, checkpoint_form, D, L, 0, power % 100, NULL); return checkpoint_form; } @@ -255,7 +253,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb #ifdef ENABLE_TRACK_CYCLES print( "track cycles enabled; results will be wrong" ); - repeated_square_original(f, D, L, 100); //randomize the a and b values + repeated_square_original(weso.vdfo, f, D, L, 100); //randomize the a and b values #endif // This works single threaded @@ -276,7 +274,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb if (actual_iterations==~uint64(0)) { //corruption; f is unchanged. do the entire batch with the slow algorithm - repeated_square_original(f, D, L, num_iterations, batch_size, &weso); + repeated_square_original(weso.vdfo, f, D, L, num_iterations, batch_size, &weso); actual_iterations=batch_size; #ifdef VDF_TEST @@ -292,7 +290,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb //the fast algorithm terminated prematurely for whatever reason. f is still valid //it might terminate prematurely again (e.g. gcd quotient too large), so will do one iteration of the slow algorithm //this will also reduce f if the fast algorithm terminated because it was too big - repeated_square_original(f, D, L, num_iterations+actual_iterations, 1, &weso); + repeated_square_original(weso.vdfo, f, D, L, num_iterations+actual_iterations, 1, &weso); #ifdef VDF_TEST ++num_iterations_slow; @@ -312,7 +310,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb form f_copy_2=f; weso.reduce(f_copy_2); - repeated_square_original(f_copy, D, L, actual_iterations); + repeated_square_original(weso.vdfo, f_copy, D, L, actual_iterations); assert(f_copy==f_copy_2); } #endif @@ -595,7 +593,9 @@ Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, ui if (stop_signal) return Proof(); - form y = weso.GetFormFromCheckpoint(done_iterations + num_iterations); + vdf_original vdfo; + + form y = weso.GetFormFromCheckpoint(vdfo, done_iterations + num_iterations); auto proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso, stop_signal); if (stop_signal) @@ -730,7 +730,6 @@ void session(tcp::socket sock) { init_gmp(); allow_integer_constructor=true; //make sure the old gmp allocator isn't used set_rounding_mode(); - vdf_original::init(); integer L=root(-D, 4); form f=form::generator(D); @@ -764,7 +763,7 @@ void session(tcp::socket sock) { boost::asio::read(sock, boost::asio::buffer(data, size), error); int iters = atoi(data); std::cout << "Got iterations " << iters << "\n"; - if (seen_iterations.size() > 0 && iters != 0) { + if (seen_iterations.size() > 3 && iters != 0) { std::cout << "Ignoring..." << iters << "\n"; continue; } diff --git a/lib/chiavdf/fast_vdf/vdf_original.h b/lib/chiavdf/fast_vdf/vdf_original.h index 3d6c9ddc..8e06a1fb 100644 --- a/lib/chiavdf/fast_vdf/vdf_original.h +++ b/lib/chiavdf/fast_vdf/vdf_original.h @@ -14,7 +14,10 @@ See the License for the specific language governing permissions and limitations under the License. ***/ -namespace vdf_original { +class vdf_original +{ +public: + struct form { // y = ax^2 + bxy + y^2 mpz_t a; @@ -24,15 +27,11 @@ namespace vdf_original { //mpz_t d; // discriminant }; - ostream& operator<<(ostream& os, const form& f) { - return os << "a: " << f.a << endl << "b: " << f.b << endl << "c: " << f.c << endl; - } - mpz_t negative_a, r, denom, old_b, ra, s, x, old_a, g, d, e, q, w, u, a, b, m, k, mu, v, sigma, lambda, h, t, l, j; form f3; - inline void normalize(form& f) { + void normalize(form& f) { mpz_neg(negative_a, f.a); if (mpz_cmp(f.b, negative_a) > 0 && mpz_cmp(f.b, f.a) <= 0) { // Already normalized @@ -65,7 +64,7 @@ namespace vdf_original { mpz_add(f.c, f.c, ra); } - inline void reduce(form& f) { + void reduce(form& f) { normalize(f); while ((mpz_cmp(f.a, f.c) > 0) || (mpz_cmp(f.a, f.c) == 0 && mpz_cmp_si(f.b, 0) < 0)) { @@ -105,7 +104,7 @@ namespace vdf_original { normalize(f); } - inline form generator_for_discriminant(mpz_t* d) { + form generator_for_discriminant(mpz_t* d) { form x; mpz_init_set_ui(x.a, 2); mpz_init_set_ui(x.b, 1); @@ -127,7 +126,7 @@ namespace vdf_original { // Returns mu and v, solving for x: ax = b mod m // such that x = u + vn (n are all integers). Assumes that mu and v are initialized. // Returns 0 on success, -1 on failure - inline int solve_linear_congruence(mpz_t& mu, mpz_t& v, mpz_t& a, mpz_t& b, mpz_t& m) { + int solve_linear_congruence(mpz_t& mu, mpz_t& v, mpz_t& a, mpz_t& b, mpz_t& m) { // g = gcd(a, m), and da + em = g mpz_gcdext(g, d, e, a, m); @@ -148,7 +147,7 @@ namespace vdf_original { } // Faster version without check, and without returning v - inline int solve_linear_congruence(mpz_t& mu, mpz_t& a, mpz_t& b, mpz_t& m) { + int solve_linear_congruence(mpz_t& mu, mpz_t& a, mpz_t& b, mpz_t& m) { mpz_gcdext(g, d, e, a, m); mpz_fdiv_q(q, b, g); mpz_mul(mu, q, d); @@ -157,12 +156,12 @@ namespace vdf_original { } // Takes the gcd of three numbers - inline void three_gcd(mpz_t& ret, mpz_t& a, mpz_t& b, mpz_t& c) { + void three_gcd(mpz_t& ret, mpz_t& a, mpz_t& b, mpz_t& c) { mpz_gcd(ret, a, b); mpz_gcd(ret, ret, c); } - inline form* multiply(form &f1, form &f2) { + form* multiply(form &f1, form &f2) { //assert(mpz_cmp(f1.d, f2.d) == 0); // g = (b1 + b2) / 2 @@ -280,7 +279,7 @@ namespace vdf_original { * C = mu^2 - (b * mu - c)/a * 3. reduce f(A, B, C) **/ - inline form* square(form &f1) { + form* square(form &f1) { int ret = solve_linear_congruence(mu, f1.b, f1.c, f1.a); assert(ret == 0); @@ -306,16 +305,16 @@ namespace vdf_original { } // Performs the VDF squaring iterations - inline form repeated_square(form *f, uint64_t iterations) { + form repeated_square(form *f, uint64_t iterations) { for (uint64_t i=0; i < iterations; i++) { f = square(*f); } return *f; } - void init() { + vdf_original() { mpz_inits(negative_a, r, denom, old_a, old_b, ra, s, x, g, d, e, q, w, m, u, a, b, k, mu, v, sigma, lambda, f3.a, f3.b, f3.c, //f3.d, NULL); } -} +}; From f8531017c1c84f6be0cd4620f4a480e5fa55a91e Mon Sep 17 00:00:00 2001 From: Bill Blanke Date: Fri, 4 Oct 2019 14:54:15 -0700 Subject: [PATCH 06/13] initialize arrays thought shouldn't matter --- lib/chiavdf/fast_vdf/vdf.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index 694b814d..8a558a6e 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -760,6 +760,7 @@ void session(tcp::socket sock) { memset(data, 0, sizeof(data)); boost::asio::read(sock, boost::asio::buffer(data, 1), error); int size = data[0] - '0'; + memset(data, 0, sizeof(data)); boost::asio::read(sock, boost::asio::buffer(data, size), error); int iters = atoi(data); std::cout << "Got iterations " << iters << "\n"; @@ -788,6 +789,7 @@ void session(tcp::socket sock) { std::cout << "Stopped everything! Ready for the next challenge.\n"; char ack[5]; + memset(ack,0x00,sizeof(ack)); boost::asio::read(sock, boost::asio::buffer(ack, 3), error); assert (strncmp(ack, "ACK", 3) == 0); } catch (std::exception& e) { From 1de9027c45a52a07af1717567b6860bc634947fd Mon Sep 17 00:00:00 2001 From: fchirica Date: Mon, 7 Oct 2019 18:27:52 +0300 Subject: [PATCH 07/13] Hunt memory leaks --- lib/chiavdf/fast_vdf/nucomp.h | 11 ++++++++++ lib/chiavdf/fast_vdf/vdf.cpp | 33 +++++++++++++++++++---------- lib/chiavdf/fast_vdf/vdf_new.h | 2 +- lib/chiavdf/fast_vdf/vdf_original.h | 6 ++++++ src/timelord.py | 11 ++++++++++ 5 files changed, 51 insertions(+), 12 deletions(-) diff --git a/lib/chiavdf/fast_vdf/nucomp.h b/lib/chiavdf/fast_vdf/nucomp.h index ab0fe5be..a88baec1 100644 --- a/lib/chiavdf/fast_vdf/nucomp.h +++ b/lib/chiavdf/fast_vdf/nucomp.h @@ -188,4 +188,15 @@ void nucomp_form(form &a, form &b, form &c, integer &D, integer &L) { fmpz_get_mpz(a.a.impl,fr.a); fmpz_get_mpz(a.b.impl,fr.b); fmpz_get_mpz(a.c.impl,fr.c); + fmpz_clear(fr.a); + fmpz_clear(fr.b); + fmpz_clear(fr.c); + fmpz_clear(fr2.a); + fmpz_clear(fr2.b); + fmpz_clear(fr2.c); + fmpz_clear(fr3.a); + fmpz_clear(fr3.b); + fmpz_clear(fr3.c); + fmpz_clear(anticD); + fmpz_clear(anticL); } diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index 8a558a6e..8f03555c 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -59,6 +59,8 @@ struct akashnil_form { const int64_t THRESH = 1UL<<31; const int64_t EXP_THRESH = 31; +std::vector forms; + //always works void repeated_square_original(vdf_original &vdfo, form& f, const integer& D, const integer& L, uint64 base, uint64 iterations, INUDUPLListener *nuduplListener) { vdf_original::form f_in,*f_res; @@ -83,7 +85,7 @@ class WesolowskiCallback :public INUDUPLListener { public: uint64_t kl; - struct form *forms; + //struct form *forms; form result; bool deferred; @@ -100,14 +102,14 @@ public: vdf_original vdfo; WesolowskiCallback(uint64_t expected_space) { - forms = (form*) malloc(sizeof(struct form) * expected_space); + // = (form*) malloc(sizeof(struct form) * expected_space); t=new ClassGroupContext(4096); reducer=new Reducer(*t); } ~WesolowskiCallback() { - free(forms); + //free(forms); delete(reducer); delete(t); @@ -169,7 +171,7 @@ public: { form *mulf=GetForm(iteration); // Initialize since it is raw memory - mpz_inits(mulf->a.impl,mulf->b.impl,mulf->c.impl,NULL); + // mpz_inits(mulf->a.impl,mulf->b.impl,mulf->c.impl,NULL); switch(type) { @@ -740,11 +742,11 @@ void session(tcp::socket sock) { std::vector threads; WesolowskiCallback weso(1000000); - mpz_init(weso.forms[0].a.impl); - mpz_init(weso.forms[0].b.impl); - mpz_init(weso.forms[0].c.impl); + //mpz_init(weso.forms[0].a.impl); + //mpz_init(weso.forms[0].b.impl); + //mpz_init(weso.forms[0].c.impl); - weso.forms[0]=f; + forms[0]=f; weso.D = D; weso.L = L; weso.kl = 10; @@ -764,7 +766,12 @@ void session(tcp::socket sock) { boost::asio::read(sock, boost::asio::buffer(data, size), error); int iters = atoi(data); std::cout << "Got iterations " << iters << "\n"; - if (seen_iterations.size() > 3 && iters != 0) { + if (seen_iterations.size() > 0 && *seen_iterations.begin() <= iters) { + std::cout << "Ignoring..." << iters << "\n"; + continue; + } + + if (seen_iterations.size() > 2 && iters != 0) { std::cout << "Ignoring..." << iters << "\n"; continue; } @@ -808,7 +815,11 @@ void server(boost::asio::io_context& io_context, unsigned short port) } int main(int argc, char* argv[]) -{ +{ + forms.reserve(1000000); + for (int i = 0; i < 1000000; i++) { + mpz_inits(forms[i].a.impl, forms[i].b.impl, forms[i].c.impl, NULL); + } try { if (argc != 2) @@ -827,4 +838,4 @@ int main(int argc, char* argv[]) } return 0; -} +} \ No newline at end of file diff --git a/lib/chiavdf/fast_vdf/vdf_new.h b/lib/chiavdf/fast_vdf/vdf_new.h index 3452e04c..2d13d9db 100644 --- a/lib/chiavdf/fast_vdf/vdf_new.h +++ b/lib/chiavdf/fast_vdf/vdf_new.h @@ -126,7 +126,7 @@ struct form { static form generator(const integer& d) { return from_abd(integer(2), integer(1), d); } - + void reduce() { ::reduce(a, b, c); } diff --git a/lib/chiavdf/fast_vdf/vdf_original.h b/lib/chiavdf/fast_vdf/vdf_original.h index 8e06a1fb..d8cb3a57 100644 --- a/lib/chiavdf/fast_vdf/vdf_original.h +++ b/lib/chiavdf/fast_vdf/vdf_original.h @@ -317,4 +317,10 @@ public: u, a, b, k, mu, v, sigma, lambda, f3.a, f3.b, f3.c, //f3.d, NULL); } + + ~vdf_original() { + /*mpz_clears(negative_a, r, denom, old_a, old_b, ra, s, x, g, d, e, q, w, m, + u, a, b, k, mu, v, sigma, lambda, f3.a, f3.b, f3.c); + */ + } }; diff --git a/src/timelord.py b/src/timelord.py index 7dc20206..89aa4a03 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -21,6 +21,7 @@ from src.server.outbound_message import OutboundMessage, Delivery, Message, Node class Database: lock: Lock = Lock() free_servers = [] + solved_discriminants = [] active_discriminants: Dict = {} done_discriminants = [] @@ -40,6 +41,11 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): disc: int = create_discriminant(challenge_start.challenge_hash, constants.DISCRIMINANT_SIZE_BITS) + async with db.lock: + if challenge_start.challenge_hash in db.done_discriminants: + log.info("This discriminant was already done..") + return + #Wait for a server to become free. port = None while (port is None): @@ -101,6 +107,11 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): response = timelord_protocol.ProofOfTimeFinished(proof_of_time) log.info(f"Got PoT for challenge {challenge_start.challenge_hash}") + async with db.lock: + if (challenge_start.challenge_hash in db.solved_discriminants): + log.info("I've already propagated one proof... Ignoring for now...") + continue + db.solved_discriminants.append(challenge_start.challenge_hash) yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) except Exception as e: e_to_str = str(e) From 52a19e7aafcbdbbae93f97b427fe6f85ee67e825 Mon Sep 17 00:00:00 2001 From: fchirica Date: Mon, 7 Oct 2019 20:02:34 +0300 Subject: [PATCH 08/13] Sync with master --- README.md | 1 + src/__init__.py | 0 src/blockchain.py | 167 +++++++++++++++-------------- src/consensus/__init__.py | 0 src/consensus/block_rewards.py | 3 + src/consensus/constants.py | 43 +++++--- src/consensus/pot_iterations.py | 3 +- src/consensus/weight_verifier.py | 5 + src/farmer.py | 2 +- src/full_node.py | 131 ++++++++++++---------- src/plotter.py | 2 +- src/protocols/__init__.py | 0 src/protocols/farmer_protocol.py | 5 +- src/protocols/peer_protocol.py | 40 ++++--- src/server/__init__.py | 0 src/server/server.py | 2 +- src/server/start_farmer.py | 2 +- src/server/start_plotter.py | 2 +- src/server/start_timelord.py | 2 +- src/simulation/simulate_network.sh | 6 +- src/timelord.py | 4 +- src/types/proof_of_time.py | 7 +- src/util/__init__.py | 0 src/util/api_decorators.py | 4 +- src/util/errors.py | 5 + tests/block_tools.py | 122 ++++++++++++++++----- tests/test_blockchain.py | 158 ++++++++++++++++----------- 27 files changed, 434 insertions(+), 282 deletions(-) create mode 100644 src/__init__.py create mode 100644 src/consensus/__init__.py create mode 100644 src/protocols/__init__.py create mode 100644 src/server/__init__.py create mode 100644 src/util/__init__.py diff --git a/README.md b/README.md index 6222b57b..9ea8d05e 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ python -m src.server.start_timelord python -m src.server.start_farmer python -m src.server.start_full_node "127.0.0.1" 8002 "-f" "-t" python -m src.server.start_full_node "127.0.0.1" 8004 +python -m src.server.start_full_node "127.0.0.1" 8005 ``` You can also run the simulation, which runs all servers at once. diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/blockchain.py b/src/blockchain.py index 69145985..92185dac 100644 --- a/src/blockchain.py +++ b/src/blockchain.py @@ -10,17 +10,8 @@ from src.util.ints import uint64, uint32 from src.types.trunk_block import TrunkBlock from src.types.full_block import FullBlock from src.consensus.pot_iterations import calculate_iterations, calculate_iterations_quality -from src.consensus.constants import ( - DIFFICULTY_STARTING, - DIFFICULTY_TARGET, - DIFFICULTY_EPOCH, - DIFFICULTY_DELAY, - DIFFICULTY_WARP_FACTOR, - DIFFICULTY_FACTOR, - NUMBER_OF_TIMESTAMPS, - MAX_FUTURE_TIME, - GENESIS_BLOCK -) +from src.consensus.constants import constants as consensus_constants + log = logging.getLogger(__name__) @@ -39,29 +30,24 @@ class ReceiveBlockResult(Enum): class Blockchain: - def __init__(self, genesis: Optional[FullBlock] = None): - if not genesis: - try: - genesis = self.get_genesis_block() - except ValueError: - raise ValueError("Failed to parse genesis block.") + def __init__(self, override_constants: Dict = {}): + # Allow passing in custom overrides for any consesus parameters + self.constants: Dict = consensus_constants + for key, value in override_constants.items(): + self.constants[key] = value self.heads: List[FullBlock] = [] self.lca_block: FullBlock = None self.blocks: Dict[bytes32, FullBlock] = {} self.height_to_hash: Dict[uint64, bytes32] = {} - result = self.receive_block(genesis) + self.genesis = FullBlock.from_bytes(self.constants["GENESIS_BLOCK"]) + result = self.receive_block(self.genesis) assert result == ReceiveBlockResult.ADDED_TO_HEAD - self.genesis = genesis - # For blocks with height % DIFFICULTY_DELAY == 1, a link to the hash of - # the (DIFFICULTY_DELAY)-th parent of this block + # For blocks with height % constants["DIFFICULTY_DELAY"] == 1, a link to the hash of + # the (constants["DIFFICULTY_DELAY"])-th parent of this block self.header_warp: Dict[bytes32, bytes32] = {} - @staticmethod - def get_genesis_block() -> FullBlock: - return FullBlock.from_bytes(GENESIS_BLOCK) - def get_current_heads(self) -> List[TrunkBlock]: """ Return the heads. @@ -135,7 +121,7 @@ class Blockchain: if trunk is None: raise Exception("No block found for given header_hash") elif trunk is self.genesis.trunk_block: - return uint64(DIFFICULTY_STARTING) + return uint64(self.constants["DIFFICULTY_STARTING"]) prev_block = self.blocks.get(trunk.prev_header_hash, None) if prev_block is None: @@ -144,51 +130,82 @@ class Blockchain: - prev_block.trunk_block.challenge.total_weight) def get_next_difficulty(self, header_hash: bytes32) -> uint64: - return self.get_difficulty(header_hash) - # Returns the difficulty of the next block that extends onto header_hash. # Used to calculate the number of iterations. - # TODO: Assumes header_hash is of a connected block - block = self.blocks.get(header_hash, None) + next_height: uint32 = block.height + 1 if block is None: raise Exception("Given header_hash must reference block already added") - if block.height % DIFFICULTY_EPOCH != DIFFICULTY_DELAY: + if next_height % self.constants["DIFFICULTY_EPOCH"] != self.constants["DIFFICULTY_DELAY"]: # Not at a point where difficulty would change return self.get_difficulty(header_hash) - elif block.height == DIFFICULTY_DELAY: - return uint64(DIFFICULTY_FACTOR * DIFFICULTY_STARTING) + elif next_height < self.constants["DIFFICULTY_EPOCH"]: + # We are in the first epoch + return uint64(self.constants["DIFFICULTY_STARTING"]) - # The current block has height i + DELAY. + # old diff curr diff new diff + # ----------|-----|----------------------|-----|-----... + # h1 h2 h3 i-1 + height1 = uint64(next_height - self.constants["DIFFICULTY_EPOCH"] - self.constants["DIFFICULTY_DELAY"] - 1) + height2 = uint64(next_height - self.constants["DIFFICULTY_EPOCH"] - 1) + height3 = uint64(next_height - self.constants["DIFFICULTY_DELAY"] - 1) + + block1, block2, block3 = None, None, None + if block.trunk_block not in self.get_current_heads() or height3 not in self.height_to_hash: + # This means we are either on a fork, or on one of the chains, but after the LCA, + # so we manually backtrack. + curr = block + while (curr.height not in self.height_to_hash or self.height_to_hash[curr.height] != curr.header_hash): + if curr.height == height1: + block1 = curr + elif curr.height == height2: + block2 = curr + elif curr.height == height3: + block3 = curr + curr = self.blocks[curr.prev_header_hash] + # Once we are before the fork point (and before the LCA), we can use the height_to_hash map + if not block1 and height1 >= 0: + # hiehgt1 could be -1, for the first difficulty calculation + block1 = self.blocks[self.height_to_hash[height1]] + if not block2: + block2 = self.blocks[self.height_to_hash[height2]] + if not block3: + block3 = self.blocks[self.height_to_hash[height3]] + + # Current difficulty parameter (diff of block h = i - 1) Tc = self.get_difficulty(header_hash) - warp = header_hash - for _ in range(DIFFICULTY_DELAY - 1): - warp = self.blocks[warp].hash - # warp: header_hash of height {i + 1} - warp2 = warp - for _ in range(DIFFICULTY_WARP_FACTOR - 1): - warp2 = self.header_warp.get(warp2, None) - # warp2: header_hash of height {i + 1 - EPOCH + DELAY} - Tp = self.get_difficulty(self.blocks[warp2].prev_header_hash) + # Previous difficulty parameter (diff of block h = i - 2048 - 1) + Tp = self.get_difficulty(block2.header_hash) + if block1: + timestamp1 = block1.trunk_block.header.data.timestamp # i - 512 - 1 + else: + # In the case of height == -1, there is no timestamp here, so assume the genesis block + # took constants["BLOCK_TIME_TARGET"] seconds to mine. + timestamp1 = (self.blocks[self.height_to_hash[uint64(0)]].trunk_block.header.data.timestamp + - self.constants["BLOCK_TIME_TARGET"]) + timestamp2 = block2.trunk_block.header.data.timestamp # i - 2048 + 512 - 1 + timestamp3 = block3.trunk_block.header.data.timestamp # i - 512 - 1 - # X_i : timestamp of i-th block, (EPOCH divides i) - # Current block @warp is i+1 - temp_block = self.blocks[warp] - timestamp1 = temp_block.trunk_block.header.data.timestamp # X_{i+1} - temp_block = self.blocks[warp2] - timestamp2 = temp_block.trunk_block.header.data.timestamp # X_{i+1-EPOCH+DELAY} - temp_block = self.blocks[self.header_warp[temp_block.hash]] - timestamp3 = temp_block.trunk_block.header.data.timestamp # X_{i+1-EPOCH} + # Numerator fits in 128 bits, so big int is not necessary + # We multiply by the denominators here, so we only have one fraction in the end (avoiding floating point) + term1 = (self.constants["DIFFICULTY_DELAY"] * Tp * (timestamp3 - timestamp2) * + self.constants["BLOCK_TIME_TARGET"]) + term2 = ((self.constants["DIFFICULTY_WARP_FACTOR"] - 1) * (self.constants["DIFFICULTY_EPOCH"] - + self.constants["DIFFICULTY_DELAY"]) * Tc + * (timestamp2 - timestamp1) * self.constants["BLOCK_TIME_TARGET"]) - diff_natural = ( - (DIFFICULTY_EPOCH - DIFFICULTY_DELAY) * Tc * (timestamp2 - timestamp3) - ) - diff_natural += DIFFICULTY_DELAY * Tp * (timestamp1 - timestamp2) - diff_natural *= DIFFICULTY_TARGET - diff_natural //= (timestamp1 - timestamp2) * (timestamp2 - timestamp3) - difficulty = max(min(diff_natural, Tc * 4), Tc // 4) # truncated comparison - return difficulty + # Round down after the division + new_difficulty: uint64 = uint64((term1 + term2) // + (self.constants["DIFFICULTY_WARP_FACTOR"] * + (timestamp3 - timestamp2) * + (timestamp2 - timestamp1))) + + # Only change by a max factor, to prevent attacks, as in greenpaper, and must be at least 1 + if new_difficulty >= Tc: + return min(new_difficulty, uint64(self.constants["DIFFICULTY_FACTOR"] * Tc)) + else: + return max([uint64(1), new_difficulty, uint64(Tc // self.constants["DIFFICULTY_FACTOR"])]) def get_vdf_rate_estimate(self) -> Optional[uint64]: """ @@ -250,18 +267,18 @@ class Blockchain: last_timestamps: List[uint64] = [] prev_block: Optional[FullBlock] = self.blocks[block.prev_header_hash] curr = prev_block - while len(last_timestamps) < NUMBER_OF_TIMESTAMPS: + while len(last_timestamps) < self.constants["NUMBER_OF_TIMESTAMPS"]: last_timestamps.append(curr.trunk_block.header.data.timestamp) try: curr = self.blocks[curr.prev_header_hash] except KeyError: break - if len(last_timestamps) != NUMBER_OF_TIMESTAMPS and curr.trunk_block.challenge.height != 0: + if len(last_timestamps) != self.constants["NUMBER_OF_TIMESTAMPS"] and curr.body.coinbase.height != 0: return False prev_time: uint64 = uint64(sum(last_timestamps) / len(last_timestamps)) if block.trunk_block.header.data.timestamp < prev_time: return False - if block.trunk_block.header.data.timestamp > time.time() + MAX_FUTURE_TIME: + if block.trunk_block.header.data.timestamp > time.time() + self.constants["MAX_FUTURE_TIME"]: return False else: prev_block: Optional[FullBlock] = None @@ -300,7 +317,7 @@ class Blockchain: return False # 10. Check coinbase amount - if calculate_block_reward(block.trunk_block.challenge.height) != block.body.coinbase.amount: + if calculate_block_reward(block.body.coinbase.height) != block.body.coinbase.amount: return False # 11. Check coinbase signature with pool pk @@ -328,7 +345,7 @@ class Blockchain: if not genesis: difficulty: uint64 = self.get_next_difficulty(block.prev_header_hash) else: - difficulty: uint64 = uint64(DIFFICULTY_STARTING) + difficulty: uint64 = uint64(self.constants["DIFFICULTY_STARTING"]) # 2. Check proof of space hash if block.trunk_block.proof_of_space.get_hash() != block.trunk_block.challenge.proof_of_space_hash: @@ -344,7 +361,8 @@ class Blockchain: return False # 4. Check PoT - if not block.trunk_block.proof_of_time.is_valid() and not genesis: + #TODO(Florin): Change the hardcode of the genesis with new pot format. + if not block.trunk_block.proof_of_time.is_valid(self.constants["DISCRIMINANT_SIZE_BITS"]) and not genesis: return False if block.body.coinbase.height != block.trunk_block.challenge.height: @@ -424,24 +442,7 @@ class Blockchain: while len(self.heads) >= 4: self.heads.sort(key=lambda b: b.weight, reverse=True) self.heads.pop() - log.info(f"Updated heads, new heights: {[b.height for b in self.heads]}") + log.info(f"\tUpdated heads, new heights: {[b.height for b in self.heads]}") self._reconsider_lca() return True return False - - def _get_warpable_trunk(self, trunk: TrunkBlock) -> TrunkBlock: - height = trunk.challenge.height - while height % DIFFICULTY_DELAY != 1: - trunk = self.blocks[trunk.header.header_hash].trunk_block - height = trunk.challenge.height - return trunk - - def _consider_warping_link(self, trunk: TrunkBlock): - # Assumes trunk is already connected - if trunk.challenge.height % DIFFICULTY_DELAY != 1: - return - warped_trunk: TrunkBlock = self.blocks[trunk.prev_header_hash].trunk_block - while warped_trunk and warped_trunk.challenge.height % DIFFICULTY_DELAY != 1: - warped_trunk = self.blocks.get(warped_trunk.prev_header_hash, None).trunk_block - if warped_trunk is not None: - self.header_warp[trunk.header.header_hash] = warped_trunk.header.header_hash diff --git a/src/consensus/__init__.py b/src/consensus/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/consensus/block_rewards.py b/src/consensus/block_rewards.py index 982f3416..f8703dbc 100644 --- a/src/consensus/block_rewards.py +++ b/src/consensus/block_rewards.py @@ -2,5 +2,8 @@ from src.util.ints import uint64, uint32 def calculate_block_reward(height: uint32) -> uint64: + """ + Returns the coinbase reward at a certain block height. # TODO: implement real block schedule + """ return uint64(10) diff --git a/src/consensus/constants.py b/src/consensus/constants.py index efe36dc2..d188057f 100644 --- a/src/consensus/constants.py +++ b/src/consensus/constants.py @@ -1,18 +1,31 @@ -NUMBER_OF_HEADS = 3 # The number of tips each full node keeps track of and propagates -DIFFICULTY_STARTING = 60 # These are in units of 2^32 -DIFFICULTY_EPOCH = 10 # The number of blocks per epoch -DIFFICULTY_TARGET = 10 # The target number of seconds per block -DIFFICULTY_FACTOR = 4 # The next difficulty is truncated to range [prev / FACTOR, prev * FACTOR] -DIFFICULTY_WARP_FACTOR = 4 # DELAY divides EPOCH in order to warp efficiently. -DIFFICULTY_DELAY = DIFFICULTY_EPOCH // DIFFICULTY_WARP_FACTOR # The delay in blocks before the difficulty reset applies -DISCRIMINANT_SIZE_BITS = 1024 +constants = { + "NUMBER_OF_HEADS": 3, # The number of tips each full node keeps track of and propagates + "DIFFICULTY_STARTING": 20, # These are in units of 2^32 + "BLOCK_TIME_TARGET": 10, # The target number of seconds per block + "DIFFICULTY_FACTOR": 4, # The next difficulty is truncated to range [prev / FACTOR, prev * FACTOR] -# The percentage of the difficulty target that the VDF must be run for, at a minimum -MIN_BLOCK_TIME_PERCENT = 20 -MIN_VDF_ITERATIONS = 1 # These are in units of 2^32 + # These 3 constants must be changed at the same time + "DIFFICULTY_EPOCH": 12, # The number of blocks per epoch + "DIFFICULTY_WARP_FACTOR": 4, # DELAY divides EPOCH in order to warp efficiently. + "DIFFICULTY_DELAY": 3, # EPOCH / WARP_FACTOR -MAX_FUTURE_TIME = 7200 # The next block can have a timestamp of at most these many seconds more -NUMBER_OF_TIMESTAMPS = 11 # Than the average of the last NUMBEBR_OF_TIMESTAMPS blocks + "DISCRIMINANT_SIZE_BITS": 1024, -# Hardcoded genesis block, generated using block tools -GENESIS_BLOCK = b'\x15N3\xd3\xf9H\xc2K\x96\xfe\xf2f\xa2\xbf\x87\x0e\x0f,\xd0\xd4\x0f6s\xb1".\\\xf5\x8a\xb4\x03\x84\x8e\xf9\xbb\xa1\xca\xdef3:\xe4?\x0c\xe5\xc6\x12\x80\x15N3\xd3\xf9H\xc2K\x96\xfe\xf2f\xa2\xbf\x87\x0e\x0f,\xd0\xd4\x0f6s\xb1".\\\xf5\x8a\xb4\x03\x84\x8e\xf9\xbb\xa1\xca\xdef3:\xe4?\x0c\xe5\xc6\x12\x80\x13\x00\x00\x00\x98\xf9\xeb\x86\x90Kj\x01\x1cZk_\xe1\x9c\x03;Z\xb9V\xe2\xe8\xa5\xc8\n\x0c\xbbU\xa6\xc5\xc5\xbcH\xa3\xb3fd\xcd\xb8\x83\t\xa9\x97\x96\xb5\x91G \xb2\x9e\x05\\\x91\xe1<\xee\xb1\x06\xc3\x18~XuI\xc8\x8a\xb5b\xd7.7\x96Ej\xf3DThs\x18s\xa5\xd4C\x1ea\xfd\xd5\xcf\xb9o\x18\xea6n\xe22*\xb0]%\x15\xd0i\x83\xcb\x9a\xa2.+\x0f1\xcd\x03Z\xf3]\'\xbf|\x8b\xa6\xbcF\x10\xe8Q\x19\xaeZ~\xe5\x1f\xf1)\xa3\xfb\x82\x1a\xb8\x12\xce\x19\xc8\xde\xb9n\x08[\xef\xfd\xf9\x0c\xec\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00/u\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00F\x172\xd9\xb50\x13\xd8\x99\xa7\x88UA)\xec\x0e\xc3//\xb15\n)z\xb6\xf8\x96kTpU\t+Q\xf1\x95\xe8\xd8\x1e\xcd\xe4RrVs\xb8\xee<5^\xf4\xbc\x0bA\x99\xa6\xeb\x95\xf7u\x89G\xd2\xfe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x01\xf3\x05\x1b&\x8f%m\x15_\x8c\xec\x1f\x038W\xc9\xec\xe5\xf0\xe4Hn\xa6\xe2\x81Yh $?\xb6D\xb1\xa1\xef\x9fP~\x9a\x88\x15s\xb6\xe8\xdd\n\xa5\xb7~\xd7E\xfe\x1c?\xd2@\x87\x97\xf0\'\xf3\x17\x03\x00\x00\x03\x8e\x00\x15\xf0>\x9d\x9ef\x86\x86\xe2\xb4\xe0zt\xee\x86mX+L\xc4\xd7U/\xcc\x12\x8c\x81\x1a(\x17\x05\xcdIc\x066\xe8\xe2\xe1#Z\xb6\xe1\xd4b\xd3\x9b\x17(\x08r\xb0P\x02\xae\xa7>eO\x97-.\xd3X\x00\x00\x9a\xae\x8d\xd5\xcc\xd6\xc5\x80\xcc\xc2}V\xfc\xac\xcdAl\x97\xd0\xc3\x93:\xb6\xeb7t\x17O\xfb$\x01\xea\xa2\x13\xab=bk\x84|\xc4W\xac"\x1f<\x8d\x02@\x94\xa4f~\x89}\xfbsP\xd4\xaaE[\xb1\x00nT\x91\xc2\xd7D\x99f\xb3A\xbc\xdap\xf7\x1b\xd6\x93\xb8\xe8\x81\x96\x0f\xc1;\x85~\xba\xd5w&7\x17J\xec\xf3\x02\xff\x83\x1aHS\xd9g\xd0\xc1\xcef\xa1\xb6bj\xb1\xc5oR\xf6`\xe7\xe5\x97\xc7\xee\x83\x03\x98\x14\x9f%\x9c\x93\xad\xd8^\x9bx\x00.9T\x1fVo\x98\xb6\xd9\x99x\xc7\x859K\x12\xec\xc3\xe5\xf9\xb5+\xe0\x01\xed\xab\xd6\xcd\x1d[7\xe2\xf5\x11^\xad\xefPl\x8cC7\xd4y\xba\xc1j\x00\x15\x05\xf6K\xac\xfa!\x89\xf0\x88z"t\xc3\x121\x000`s\xaeS\xd0O}\xd9k\xe9\x96jC\xb0,\xfa\x086*Q)\x8f\x1a.\r#h\xb3\xf5T\xc4\xa5l$u\xcd}\xae"\xde\xdbO\xb2{\xdc\x1eQ\x8b\xb5\x9bKp\xa3cO\xf2\xbde\xe91\x16\xdc\x9d\xff\xef\xa6{\x8c\x04\xd1\r\x1d\xc8\xd8\x97\xd0\xee2\xfe\xc0\xfa\x0c\xf5\xb4n\xe6|r\xd8\x88\xee\x8cQ\x9bX\x1f\n\xcf\xb2Yh\x01\'6j\xfd\xe7\x0c\xbc\xb1\xa2dy\t\xf1\xc1\x03 \xcf\xbcX\xfb3H\x9fM2\xa7\x00\x11\x871\xfb\xc1\xd7M]\xfb\xfeo\xb6\xceBt\x9c\xd5\xc7d\xee\xe7\xbf\x0c\x08\x96\xd7S\x056\xf90\xf8\xc4\xf6!\xc1N\xf5\x9e@;\x81\xc6\xca\xe1\xa4*=r\xd0\xd3/U\xc3\x14\x99g\xb5\x96\xa4(\x1ek\xf4\xff\xfb$;\xb7\x134\x19Nmb\xc4\x04\xec\x02\xd0\xc2%\xf5L\xb5N\xc0\n\x8e=R\xdem\xd9%\xa6\xec\x01A\x14-\xdb\xa4Iz.;J\xd4<\xf3\xab\xfd9\x92$\x05\xe9\x1b\xc0\x0b\x13\xcd\xfd[\x9c\xc1\x97\x8f\x00H\xce\x90@s\n\xeb\x90\xd2\xfdtvle\xc2\x19E&K\x8e\xbf\xea\x96\xea6i\xff\x96\x83\xe0\x93\xe3\xa0?\xb5h\xff/\x96\x9f\xdb\xce\xe76\x8df\xe0\x02\xd0\xc9\xfca\xd4\xc2\xd3\xc8\xaf\xe7\xeb\xe0\xc3\xbd;\x13\xc7Lf\x0c"\x1aC?qX\xc3\xd1jY\tZ\x01\x13\x81R\xcb\n\xd3\tv\x8a\x10\xba\xa6\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<\x00\x00\x00\x00\x00\x00/u\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\x89\x94\xda\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00~[u\x1f\x81\x7f\x0c)\x05\xe6\xfd\xe5\xd14\\a\n\xc6I\xccJ\x0cXk\xcf,Z\x1c\xdb>\xe0\xc3z!\xc9N\xd5\x03\x8b^\xd9\xe6\xc7I\xba\xb1\x0fm\xd4\xa0=\xb6^s\x94_f\xb5\xc1\\n\xfe\xf9\xd2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H1V\xdeN\xcc\x8a;\x1a\x8b\xe6v\x9d\x82U\xfc?\xba2K\xdfiE\xfd\x16\xe6t\x90\x86\x14;\x1aT>\xed>u\xe8P\x87\xdf7i|/\xbf\x9a3\x10\x8e\xe0\xa9p\xc3\xdcd\x86\'A\x17:6\xc2\xdc\xe1\xc7b\x9f\xe0\xbe\xd1\xfb\x8eG\xfe?\xde\xee]\xee\x1f711L\t\x0b\xbcG+\xa8b\x0e\\O\xe5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n+\x93\xa0\x02\xe4\xc2\x1d\xaa5R\xe5,\xbd\xa5\x15|%}\xa4@\xe5\x11\x00\x80\x1fG\x8aH\x0b\xe7\xe9\x10\xd3tK\xda`\xb5u\xca\x8c\xa2\xf7n\x1d\xd5\x92l\xb13k\xdb\n+\xbe/\x1e\xc0\xfe\xbf\xd9\x83\x88V\x11]~.<\x14\x0f\xce`\x8b\xbf\xb9\xa7\xce"6\x19\xa5\x19|\x81!r\x15V\xa6\x82\x07\x96w\x98F\xce\xb2(G\xcfm\x17@t\xb2\x1b\xba\xcf4I}\x0b\xc4\n\xd4\x9b\xe2E\x9e\x84\x98mY||\xa8[+\x93\xa0\x02\xe4\xc2\x1d\xaa5R\xe5,\xbd\xa5\x15|%}\xa4@\xe5\x11\x00\x80\x1fG\x8aH\x0b\xe7\xe9\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' # noqa: E501 + # The percentage of the difficulty target that the VDF must be run for, at a minimum + "MIN_BLOCK_TIME_PERCENT": 20, + "MIN_VDF_ITERATIONS": 1, # These are in units of 2^32 + + "MAX_FUTURE_TIME": 7200, # The next block can have a timestamp of at most these many seconds more + "NUMBER_OF_TIMESTAMPS": 11, # Than the average of the last NUMBEBR_OF_TIMESTAMPS blocks + + # If an unfinished block is more than these many seconds slower than the best unfinished block, + # don't propagate it. + "PROPAGATION_THRESHOLD": 1800, + # If the expected time is more than these seconds, slightly delay the propagation of the unfinished + # block, to allow better leaders to be released first. This is a slow block. + "PROPAGATION_DELAY_THRESHOLD": 600, + + # Hardcoded genesis block, generated using block tools + # GENESIS_BLOCK = b'\x15N3\xd3\xf9H\xc2K\x96\xfe\xf2f\xa2\xbf\x87\x0e\x0f,\xd0\xd4\x0f6s\xb1".\\\xf5\x8a\xb4\x03\x84\x8e\xf9\xbb\xa1\xca\xdef3:\xe4?\x0c\xe5\xc6\x12\x80\x15N3\xd3\xf9H\xc2K\x96\xfe\xf2f\xa2\xbf\x87\x0e\x0f,\xd0\xd4\x0f6s\xb1".\\\xf5\x8a\xb4\x03\x84\x8e\xf9\xbb\xa1\xca\xdef3:\xe4?\x0c\xe5\xc6\x12\x80\x14\x00\x00\x00\xa0_\x11\x18\x8d3\xa1\x8b\x8b1Q1@Z 6Q\xb4\xba\xafn{\x1c\xb5\xd7\xa4\xd9{\x93\xa1KB \xd2\x9fxK\xd1n\xa0wN\xfd&\nw\xbb7tm$/7\xa0f%\xf6\xd4\xc5\x1c\x98\xef\xb0\xd0\x10D\x10\x1a\x9b\xc3\xf8xd\x9d\xab\xaa>\xff\x7f\x84E\t.\xe5gz\\\x9a|\xdeE\x93\xe1\xba\xb9\xd0E\x1f\x9f\xc6\xb7\x89/\x0e8)\x1f\xdd\xc0\xa7\xa5|\xf0\\\xdf\xf9\xd1\xdbZm\xe6\xcb\xa5|F\xc1\xa3\x89\x87L\x14\xb8\xd9\xe82gIB\xe4\x14\x01q\x15r\xc1"E\x99\xc4\x10+\x0b^\xed?F\x01\x00Cs\x1a\x01\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x00\x00\x00\x00\x00\x00\tH\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x035*\x14\xda\xd9\xbbqE\xc6\xa1\x00\\\x8a^&\xc0\xec2\xa1\x16s\x0f\x8b\xe9\xbd\x0c\xe5\xca\x8fO\x06\x10\xfa\x85V!\xe9\xf6S\x97lu\xe6J\xcd\xb6\xfe\xa1F6g\xf2\xa6\xa6-\xa9~\x7f\x80:bGs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xf3\xe8G5o&\xb6\x16\xf5\xe7n\xb9-\xebeO-0+\xbe\xc5\x96\xe3\x0f\x1be_<:\xed\xaa\xe8\x80\xbb\xa0Z\x1b>\xa1\x87(\xe9\xba\x08\xdf\xfe\x83n\xe1r\x9aUQ\xe5z9\xd8+D\xd5H\x11\xdd\x03\x00\x00\x03\x8e\x00Cr\xfa\x1c\x06\xb5\xd1\xcd\x8e\xf8\xdc\xbd\x19\xb4 \xb3\x19$\x0fsF\xd1\xbew\xad\x14\xab@\xdf\xc2\x14[\xef\xf0\xd536)\xf5\xfcN\x10\xfbK\xc3\xaeu\x01\xc0\xc8\x1e\x8e\x95:hf\xea?\\MSH\xb8\x88\x00/\x1f[!D\xfc\xb7w\x04\xf8L\xdd\x8b\x06:\xd4\xe7\xf5\xfcR\x11\\Ra}\xa7\x9aH\x9b0\x05\x9f\x80\xf5_3\xfd\xb8\x89R\xa7\xe4\xc0R\x17\xe9B\x1c7S#\xd4\xb1\x8a4zJ\xee\xb1\x01\xce\xd4\x0e\xff\x00EY\x90\x0bV\x8d0\xba\x8d\xf33e8\xe7\x9a\xa5Y~\x08\x19r\xcfP\x88\x8d\xbc\xd3TE\xedWc\x14\xc5-\x1b\xbc\x9e\xbf\xde\xa8\x1b\x90U\xa7\xdc.\xa8\xd6\xe6\'\xf1\x03\x89\xf8\t7\xfez\x02\xda\xae\xc3\xa6\xff\xfc\x07\x9a\xfb\xa6\xcf6\xa7\x8fP\x12\x17\xaa\x1f\xda\xae\xeaS\xac\xd2D\xa5\xe0\xc5\xe3\xab\'\x00\x84_\xabnZ\xf5\xd4\x10\xbd&\x16\xae\x1b{\xa0,%\x1f\xac\x08\x0b\r\xcb\xf7\xb0Ed\xa5h?\xb29^\xe1>\xed\x00O7\xbb\r$\xb5\x03\xaf\r\x0cy\\!_\xa1\xa3\xadE\xd5\x88\xe7\xef\x1d\x8c\x8a\xb0\x8bU\xde\x88\x01\xc0\xe0\xb5h\xe3\x94\x98c,j4\x18j\xe2\xd7\xa2\x05V\xce\xb9= \x05L\xbb\xcb\x9c\x99\xb6d\xa7\x1f\xff\xbb\xf1\xa3\x99q\xfdg\xc9\x89\xbc\xb8\xbdj\xc5hu\x0bZ\xe3\xa1\x7f\xcc\x0f\xfd\x10\xa1z\xe1\xcd\xb0\xcby=\x93:\xb5\xa7 \x07\xb6.\x07\x9c\xbaR\x97\xb1@\xf4V\xc0Qs\x06\x115x\x82\xb24@\xc4\xa5\x97\x00-\xc220\x85\xfd\x01\xd6\xfb\xe7oM\xa2~\xb8^\xa7\x13\xf5V\xb2\x84ax\x8c\x93H(!\x9a\xbb\xc8\xdb\x01\x0e\xc3l\xc1\xe2E\x92d\xc8B\xfdnt\x11\x11\xd9a\x8bP\x11\x87\xad\xedQ\xe0_\x1f>aR\x0f\xc5\xa0v\x16\x16\xf6\x94[g\xf6\xb6W\xc3\xc1\xfd\xff\xfc\x05$\x13h\x08\xdd\x97\xf4\xffQdS\xe5\x00\xdd\x9f{`\x91`\x88\xe9\xf5\xed\x8e.\xa4\x81\r\xd4\x80\x81\xc5]\x1b\xb5\xa3\x15\xde\xb9\x86\x9d\xcen&\xd3\xea\xf0SP\x84\xac\x9cqo9)\xcc\x114\xab\x973\x01\xad\xb1SA\x1e\xe5\x1d\x94\xd7\xfb\xb2\xf5:\x82[\xfb7\xfe\x81\x83\x1e\x1bIMT\xb7\nRIk\xb6]O\x97.y\xf5\xf1]\xc1\x89\xbf\x87\xcc\xa9,\x87\xc1\x13\xbcN&\xcc4\xf5h \x8d\xb1\xcd\xcc\xd1;2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1e\x00\x00\x00\x00\x00\x00\tH\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\x8c\x86s\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xad\xb1SA\x1e\xe5\x1d\x94\xd7\xfb\xb2\xf5:\x82[\xfb7\xfe\x81\x83\x1e\x1bIMT\xb7\nRIk\xb6]z!\xc9N\xd5\x03\x8b^\xd9\xe6\xc7I\xba\xb1\x0fm\xd4\xa0=\xb6^s\x94_f\xb5\xc1\\n\xfe\xf9\xd2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8.\x1a\xd1\xa9f/B\xa9\xaaO\xe7\xb8O\x87\x9d(\xb0]\xf2r0\xdb]\x03\x81B\xa4\x04\xfc\xc4\xfc~\xcd\xf4\xe5\xb0\xa8{<\xe3.\xc1g\x84Y{V\x06\x1c\x15\xaf\xa47rD\xab\xb1-\xc9\x86\xbf&q\xf4\xc2_\xb3\x05\xa8\xb7\xbf\xb4\x0e\x7f\x85\xfa\xa1\xd3\xc6pS\xc6:\x13\xea2La0\xcf\xe35\xa2\xf1R\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\n+\x93\xa0\x02\xe4\xc2\x1d\xaa5R\xe5,\xbd\xa5\x15|%}\xa4@\xe5\x11\x00\x80\x1fG\x8aH\x0b\xe7\xe9\x10\xd3tK\xda`\xb5u\xca\x8c\xa2\xf7n\x1d\xd5\x92l\xb13k\xdb\n+\xbe/\x1e\xc0\xfe\xbf\xd9\x83\x88V\x11]~.<\x14\x0f\xce`\x8b\xbf\xb9\xa7\xce"6\x19\xa5\x19|\x81!r\x15V\xa6\x82\x07\x96w\x98F\xce\xb2(G\xcfm\x17@t\xb2\x1b\xba\xcf4I}\x0b\xc4\n\xd4\x9b\xe2E\x9e\x84\x98mY||\xa8[+\x93\xa0\x02\xe4\xc2\x1d\xaa5R\xe5,\xbd\xa5\x15|%}\xa4@\xe5\x11\x00\x80\x1fG\x8aH\x0b\xe7\xe9\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' # noqa: E501 + "GENESIS_BLOCK": b'\x15N3\xd3\xf9H\xc2K\x96\xfe\xf2f\xa2\xbf\x87\x0e\x0f,\xd0\xd4\x0f6s\xb1".\\\xf5\x8a\xb4\x03\x84\x8e\xf9\xbb\xa1\xca\xdef3:\xe4?\x0c\xe5\xc6\x12\x80\x15N3\xd3\xf9H\xc2K\x96\xfe\xf2f\xa2\xbf\x87\x0e\x0f,\xd0\xd4\x0f6s\xb1".\\\xf5\x8a\xb4\x03\x84\x8e\xf9\xbb\xa1\xca\xdef3:\xe4?\x0c\xe5\xc6\x12\x80\x14\x00\x00\x00\xa0_\x11\x18\x8d3\xa1\x8b\x8b1Q1@Z 6Q\xb4\xba\xafn{\x1c\xb5\xd7\xa4\xd9{\x93\xa1KB \xd2\x9fxK\xd1n\xa0wN\xfd&\nw\xbb7tm$/7\xa0f%\xf6\xd4\xc5\x1c\x98\xef\xb0\xd0\x10D\x10\x1a\x9b\xc3\xf8xd\x9d\xab\xaa>\xff\x7f\x84E\t.\xe5gz\\\x9a|\xdeE\x93\xe1\xba\xb9\xd0E\x1f\x9f\xc6\xb7\x89/\x0e8)\x1f\xdd\xc0\xa7\xa5|\xf0\\\xdf\xf9\xd1\xdbZm\xe6\xcb\xa5|F\xc1\xa3\x89\x87L\x14\xb8\xd9\xe82gIB\xe4\x14\x01q\x15r\xc1"E\x99\xc4\x10+\x0b^\xed?F\x01\x00Cs\x1a\x01\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x00\x00\x00\x00\x00\x00\x060\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00[^:j\x1bH\xe9\xc5\xe4\xe0.\xaf\x00\xaan\xa3\x8a\x12\x85\x00\xdf\xd8\xbe.\xd6\xe3\xcc\xec\xab4\x8b3\xf7 #T;a\xf4\xd4\x05\xb7\xf8B\x08\x1dc\x184\x07\x86MQ?N\x82\xd5t"\xd6\x1dL\xfa\xcf\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x1e\xac\x12\xc97`p\x037\xc6\x17_\x81\xc7\x93\x85\x84\x91\xce\xf2_\xe6&\ri\xbcx\xb8T\x06l\x1e\xed\xbeS30T"Dk\xd2\x8e\x1e\xac7\x19Y\x94\x9f\xe8Lb\xd5\x1e%%l\xb7[\xb4A\xc7\x03\x00\x00\x03\x8e\x00P\x1e\xce\x92\xbb\x8bcWOopup\xe7"\xfb\xc1\x0e\xfd\x00\xb3U\xef\x07\xa4\x14\xbd\xdaw\xa1h\xd20.\x06\xc8\'\xe3d\x89LU\x1e\xdf\xb7\xeao\x9a\x0eZ@s3[\xc6\x0b\x90\xf5\xb1GKK\\\xfd\x00=\xf4\x9ai\xf6\xf3\n\x8fx\x9d\xf8\x859\x85\x90I\x84\xa8qO\x1d\xdcy\x1f*\x83\xea"\xfc\t\x02\x1cx\xe8\xba\xcf\x15\xbe\x1dM\x8bEU\x03\'d\xf6\xb5b\xc7X\xf2\xfc>G\xfa\xf7I\xd4h<\xa9Y\xc7\x00\t\x1c~\t\xa8\x03R\xf5\t\x83\x99\\T\x02\xf8$\x06\x88\x16\xa2\'\xb1\x95K+"\x9c\x9eU\xe8\x00!\xcbr4\xd8\xc0\xae\xa9\x86\xe0m\xef\x16\xfa\x7f \xdd&(\xe5Rj\xa6x\xab`\xebvQ\x19/\x96\xc1\xff\xfc\xb3\x14\xe6#\xc6\x02\xa8,n\xfd\x92\xfb\x82D\xc7\xe9\x94\xc4\xac\xdc\xf0O\x9d\x15\t\x8f\xa9In%\xe4q\xd3\x1a\r\xe2\x1e7b\x86\xd2\xb0=C\xfe<\xe3&\xa8\x13\xb1vl\x9f\x1f\x9c\x06\xf3\x98zi\xc4\xc3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\t\x1c~\t\xa8\x03R\xf5\t\x83\x99\\T\x02\xf8$\x06\x88\x16\xa2\'\xb1\x95K+"\x9c\x9eU\xe8\x00!\xcbr4\xd8\xc0\xae\xa9\x86\xe0m\xef\x16\xfa\x7f \xdd&(\xe5Rj\xa6x\xab`\xebvQ\x19/\x96\xc1\xff\xfc\xb3\x14\xe6#\xc6\x02\xa8,n\xfd\x92\xfb\x82D\xc7\xe9\x94\xc4\xac\xdc\xf0O\x9d\x15\t\x8f\xa9In%\xe4q\xd3\x1a\r\xe2\x1e7b\x86\xd2\xb0=C\xfe<\xe3&\xa8\x13\xb1vl\x9f\x1f\x9c\x06\xf3\x98zi\xc4\xc3\x00\x1dP\x98\xaaB\x9fu\x83\xca3\xe1)\xff8\xbc\x92\x19N6\xab\x9dp\xb1E\x99N/}\x9a\xef\xdf\xe1\x8bX\r`>\xc7Y{]\xf09\xd6\x82\t\xb8U\xca\xa4\xc2\x15\xcb\r\xf7@\x1aMM\xe9/q2\xb9\xff\xef\x05\xc1\xfa\xd5\x1c\xf2\'\xc6\xf5\x11\xe7\x12\x11\\\xe6CO?Ii\xff+JB\xfdd]>DS\xf3\x88;\xa4\xa1\xd3ZA\xf5\xf9\x0c\xe3\x9f\x1b\xa8\x99\xebE\x98\xe9T),V\xd3P?N\x85\x04H\xa9\xd1\x000\xae|\xcc\x02\x9f\x86[\x0f\x19\x94vt\x92\xb7\x80\x90\xbd(\xe5\xf8\x95T\xa1)\xb7\x95`?\xb0&\xce\xb3\x10\xaa0\xfe|\\\x0f;z,\xed\x98i\x02\xb7\xde\'9B`RYS\x05q\xfa0g\x81\xca\xf9\x00\x00\xb3\xaf2\xb8\x12\x0f\x7f\x81\xcazE\xda\x82\xa2x\xbaM\xa4\xe9\xfd\xae\x96\x85\xe8\xcdv\xb82d\xc7\xbb\xae!yS\x10\xd1\xb9AU7\x9c\xb9\xf0\x0f\xb2\xb0\x02\xfc(\x1bd\xef\x04\xda\xed.\xfen\x9b!\xbb]\x00^\x99@\xef\xab\x96\xaaDy\xbaB\xde\x96\xae/\xbe\xa4q\xb0xr.\xd2\xcc\xcd4\xaf\x8df\xe5j\x0c\x98\xc8>dz\x04PC{\xa0\xac\xa6\x9f\xc31\xb3j\xfa\x89\xc3u$\x16\x87\xe7\xf3\x9c\xc8D5L$\xff\xfa\xf3u\xcd|\x1a9\x94\xa0\xca\x1acw\xd8\x7f\xb9\xca\x98\xc69Z\x89\xefZ|\xc5K\x96U\xa4\xcb\x8e\x11\xf7M\x14V\xb4\x9d:`g\xd9O\x9c\nt\xbf%\x1c\xad\xfd]\xf7t\xe8~|\x8a\x16\xb5\x89\x98a\x01\xad\xb1SA\x1e\xe5\x1d\x94\xd7\xfb\xb2\xf5:\x82[\xfb7\xfe\x81\x83\x1e\x1bIMT\xb7\nRIk\xb6]\xcf{+\xdd\x80;g\x12\x81%6\xde\tx\x90\xe72\x96\xe8m\xda\xba\xe3@\x01\xd0\xd0#\xbe-\t\xe8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x060\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00]\x8dV4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xad\xb1SA\x1e\xe5\x1d\x94\xd7\xfb\xb2\xf5:\x82[\xfb7\xfe\x81\x83\x1e\x1bIMT\xb7\nRIk\xb6]z!\xc9N\xd5\x03\x8b^\xd9\xe6\xc7I\xba\xb1\x0fm\xd4\xa0=\xb6^s\x94_f\xb5\xc1\\n\xfe\xf9\xd2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc2B\x15\xbd\xb4\x02n\x03~\x1fK$\xf7\xe0|\xb1\x9a-Mg\xac\xc4\x8c%R\x08j|\x1d1\x8cB,\xc9\xbd\xe2\xf1\r\x8c\x0c\x0bO{b\xf7\xee\xe6e\x04>\xab\xba8\xde\x1eu\xc6\xae\x0e bool: """ Verifies whether the weight of the tip is valid or not. Naiveley, looks at every block from genesis, verifying proof of space, proof of time, and difficulty resets. + # TODO: implement """ + for height, block in enumerate(proof_blocks): + if not block.height == height: + return False + return True diff --git a/src/farmer.py b/src/farmer.py index 06e47799..90fdebbb 100644 --- a/src/farmer.py +++ b/src/farmer.py @@ -206,7 +206,7 @@ async def proof_of_space_finalized(proof_of_space_finalized: farmer_protocol.Pro coinbase_signature: PrependSignature = pool_sks[0].sign_prepend(coinbase.serialize()) db.coinbase_rewards[uint32(db.current_height + 1)] = (coinbase, coinbase_signature) - log.info(f"Current height set to {db.current_height}") + log.info(f"\tCurrent height set to {db.current_height}") db.seen_challenges.add(proof_of_space_finalized.challenge_hash) if proof_of_space_finalized.height not in db.challenges: db.challenges[proof_of_space_finalized.height] = [proof_of_space_finalized] diff --git a/src/full_node.py b/src/full_node.py index 86eb5077..12faac72 100644 --- a/src/full_node.py +++ b/src/full_node.py @@ -1,5 +1,4 @@ import logging -from src.util.errors import BlockNotInBlockchain, PeersDontHaveBlock import time import asyncio import collections @@ -8,11 +7,11 @@ import concurrent from secrets import token_bytes from hashlib import sha256 from chiapos import Verifier -from blspy import Util, Signature, PrivateKey +from blspy import Signature, PrivateKey from asyncio import Lock, sleep, Event from typing import Dict, List, Tuple, Optional, AsyncGenerator, Counter from src.util.api_decorators import api_request -from src.util.ints import uint64 +from src.util.ints import uint64, uint32 from src.util import errors from src.protocols import farmer_protocol from src.protocols import timelord_protocol @@ -27,27 +26,31 @@ from src.types.full_block import FullBlock from src.types.fees_target import FeesTarget from src.consensus.weight_verifier import verify_weight from src.consensus.pot_iterations import calculate_iterations -from src.consensus.constants import DIFFICULTY_TARGET +from src.consensus.constants import constants from src.blockchain import Blockchain, ReceiveBlockResult from src.server.outbound_message import OutboundMessage, Delivery, NodeType, Message +from src.util.errors import BlockNotInBlockchain, PeersDontHaveBlock, InvalidUnfinishedBlock class Database: + # This protects all other resources lock: Lock = Lock() - blockchain: Blockchain = Blockchain() # Should be stored in memory - full_blocks: Dict[str, FullBlock] = {Blockchain.get_genesis_block().trunk_block.header.header_hash: - Blockchain.get_genesis_block()} + blockchain: Blockchain = Blockchain() + full_blocks: Dict[str, FullBlock] = { + FullBlock.from_bytes(constants["GENESIS_BLOCK"]).trunk_block.header.header_hash: + FullBlock.from_bytes(constants["GENESIS_BLOCK"])} sync_mode: bool = True # Block headers and blocks which we think might be heads, but we haven't verified yet. + # All these are used during sync mode potential_heads: Counter[bytes32] = collections.Counter() potential_heads_full_blocks: Dict[bytes32, FullBlock] = collections.Counter() # Headers/trunks downloaded for the during sync, by height - potential_trunks: Dict[uint64, TrunkBlock] = {} + potential_trunks: Dict[uint32, TrunkBlock] = {} # Blocks downloaded during sync, by height - potential_blocks: Dict[uint64, FullBlock] = {} + potential_blocks: Dict[uint32, FullBlock] = {} # Event, which gets set whenever we receive the block at each height. Waited for by sync(). - potential_blocks_received: Dict[uint64, Event] = {} + potential_blocks_received: Dict[uint32, Event] = {} # These are the blocks that we created, but don't have the PoS from farmer yet, # keyed from the proof of space hash @@ -55,7 +58,10 @@ class Database: # These are the blocks that we created, have PoS, but not PoT yet, keyed from the # block header hash - unfinished_blocks: Dict[Tuple[bytes32, int], FullBlock] = {} + unfinished_blocks: Dict[Tuple[bytes32, uint64], FullBlock] = {} + # Latest height with unfinished blocks, and expected timestamp of the finishing + unfinished_blocks_leader: Tuple[uint32, uint64] = (uint32(0), uint64(9999999999)) + proof_of_time_estimate_ips: uint64 = uint64(1500) @@ -108,7 +114,7 @@ async def proof_of_time_estimate_interval(): async with db.lock: if estimated_ips is not None: db.proof_of_time_estimate_ips = estimated_ips - log.info(f"Updated proof of time estimate to {estimated_ips} iterations per second.") + log.info(f"Updated proof of time estimate to {estimated_ips} iterations per second.") await sleep(config['update_pot_estimate_interval']) @@ -187,7 +193,7 @@ async def sync(): if height not in db.potential_trunks: received_all_trunks = False break - local_trunks.append(db.potential_trunks[uint64(height)]) + local_trunks.append(db.potential_trunks[uint32(height)]) if received_all_trunks: trunks = local_trunks break @@ -210,12 +216,12 @@ async def sync(): if not have_block: request = peer_protocol.RequestSyncBlocks(tip_block.trunk_block.header.header_hash, [height]) async with db.lock: - db.potential_blocks_received[uint64(height)] = Event() + db.potential_blocks_received[uint32(height)] = Event() found = False for _ in range(30): yield OutboundMessage(NodeType.FULL_NODE, Message("request_sync_blocks", request), Delivery.RANDOM) try: - await asyncio.wait_for(db.potential_blocks_received[uint64(height)].wait(), timeout=2) + await asyncio.wait_for(db.potential_blocks_received[uint32(height)].wait(), timeout=2) found = True break except concurrent.futures._base.TimeoutError: @@ -227,7 +233,7 @@ async def sync(): if have_block: block = db.potential_heads_full_blocks[trunks[height].header.get_hash()] else: - block = db.potential_blocks[uint64(height)] + block = db.potential_blocks[uint32(height)] start = time.time() db.blockchain.receive_block(block) @@ -353,6 +359,7 @@ async def request_header_hash(request: farmer_protocol.RequestHeaderHash) -> Asy if head.challenge.get_hash() == request.challenge_hash: target_head = head if target_head is None: + # TODO: should we still allow the farmer to farm? log.warning(f"Challenge hash: {request.challenge_hash} not in one of three heads") return @@ -405,11 +412,6 @@ async def header_signature(header_signature: farmer_protocol.HeaderSignature) -> assert block_header_data.get_hash() == header_signature.header_hash - # Verifies the plotter's signature - # TODO: remove redundant checks after they are added to Blockchain class - assert header_signature.header_signature.verify([Util.hash256(header_signature.header_hash)], - [pos.plot_pubkey]) - block_header: BlockHeader = BlockHeader(block_header_data, header_signature.header_signature) trunk: TrunkBlock = TrunkBlock(pos, None, None, block_header) unfinished_block_obj: FullBlock = FullBlock(trunk, block_body) @@ -468,7 +470,7 @@ async def new_proof_of_time(new_proof_of_time: peer_protocol.NewProofOfTime) -> if (new_proof_of_time.proof.output.challenge_hash, new_proof_of_time.proof.output.number_of_iterations) in db.unfinished_blocks: finish_block = True - elif new_proof_of_time.proof.is_valid(): + elif new_proof_of_time.proof.is_valid(constants["DISCRIMINANT_SIZE_BITS"]): propagate_proof = True if finish_block: request = timelord_protocol.ProofOfTimeFinished(new_proof_of_time.proof) @@ -476,7 +478,8 @@ async def new_proof_of_time(new_proof_of_time: peer_protocol.NewProofOfTime) -> yield msg if propagate_proof: # TODO: perhaps don't propagate everything, this is a DoS vector - yield OutboundMessage(NodeType.FULL_NODE, Message("new_proof_of_time", new_proof_of_time), Delivery.BROADCAST) + yield OutboundMessage(NodeType.FULL_NODE, Message("new_proof_of_time", new_proof_of_time), + Delivery.BROADCAST_TO_OTHERS) @api_request @@ -490,7 +493,9 @@ async def unfinished_block(unfinished_block: peer_protocol.UnfinishedBlock) -> A if not db.blockchain.is_child_of_head(unfinished_block.block): return - # TODO(alex): verify block using blockchain class, including coinbase rewards + if not db.blockchain.validate_unfinished_block(unfinished_block.block): + raise InvalidUnfinishedBlock() + prev_block: TrunkBlock = db.blockchain.get_trunk_block( unfinished_block.block.trunk_block.prev_header_hash) @@ -502,14 +507,29 @@ async def unfinished_block(unfinished_block: peer_protocol.UnfinishedBlock) -> A challenge_hash, difficulty) if (challenge_hash, iterations_needed) in db.unfinished_blocks: - log.info(f"Have already seen unfinished block {(challenge_hash, iterations_needed)}") + log.info(f"\tHave already seen unfinished block {(challenge_hash, iterations_needed)}") return - expected_time: float = iterations_needed / db.proof_of_time_estimate_ips + expected_time: uint64 = uint64(iterations_needed / db.proof_of_time_estimate_ips) - # TODO(alex): tweak this - log.info(f"Expected finish time: {expected_time}") - if expected_time > 10 * DIFFICULTY_TARGET: + if expected_time > constants["PROPAGATION_DELAY_THRESHOLD"]: + # If this block is slow, sleep to allow faster blocks to come out first + await asyncio.sleep(2) + + async with db.lock: + if unfinished_block.block.height > db.unfinished_blocks_leader[0]: + # If this is the first block we see at this height, propagate + db.unfinished_blocks_leader = (unfinished_block.block.height, expected_time) + elif unfinished_block.block.height == db.unfinished_blocks_leader[0]: + if expected_time > db.unfinished_blocks_leader[1] + constants["PROPAGATION_THRESHOLD"]: + # If VDF is expected to finish X seconds later than the best, don't propagate + return + elif expected_time < db.unfinished_blocks_leader[1]: + # If this will be the first block to finalize, update our leader + db.unfinished_blocks_leader = (db.unfinished_blocks_leader[0], expected_time) + else: + # If we have seen an unfinished block at a greater or equal height, don't propagate + # TODO: should we? return db.unfinished_blocks[(challenge_hash, iterations_needed)] = unfinished_block.block @@ -534,15 +554,14 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N db.potential_heads_full_blocks[header_hash] = block.block return - if header_hash in db.full_blocks: - log.info(f"Already have block {header_hash} height {block.block.trunk_block.challenge.height}") - return - # TODO(alex): Check if we care about this block, we don't want to add random - # disconnected blocks. For example if it's on one of the heads, or if it's an older - # block that we need added: ReceiveBlockResult = db.blockchain.receive_block(block.block) - if not (added == ReceiveBlockResult.ADDED_TO_HEAD or added == ReceiveBlockResult.ADDED_AS_ORPHAN): + if added == ReceiveBlockResult.ALREADY_HAVE_BLOCK: + log.info(f"\tAlready have block {header_hash} height {block.block.trunk_block.challenge.height}") + return + elif added == ReceiveBlockResult.INVALID_BLOCK: + log.warning(f"\tBlock {header_hash} at height {block.block.trunk_block.challenge.height} is invalid.") + elif added == ReceiveBlockResult.DISCONNECTED_BLOCK: async with db.lock: tip_height = max([head.challenge.height for head in db.blockchain.get_current_heads()]) if block.block.trunk_block.challenge.height > tip_height + config["sync_blocks_behind_threshold"]: @@ -555,8 +574,8 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N # Perform a sync if we have to db.sync_mode = True try: + # Performs sync, and catch exceptions so we don't close the connection async for msg in sync(): - log.error(f"Yielding {msg}") yield msg except asyncio.CancelledError: log.warning("Syncing failed") @@ -577,24 +596,26 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N async with db.lock: db.full_blocks[header_hash] = block.block - difficulty = db.blockchain.get_difficulty(header_hash) + if added == ReceiveBlockResult.ADDED_TO_HEAD: + # Only propagate blocks which extend the blockchain (one of the heads) + difficulty = db.blockchain.get_difficulty(header_hash) - pos_quality = block.block.trunk_block.proof_of_space.verify_and_get_quality( - block.block.trunk_block.proof_of_time.output.challenge_hash - ) - farmer_request = farmer_protocol.ProofOfSpaceFinalized(block.block.trunk_block.challenge.get_hash(), - block.block.trunk_block.challenge.height, - pos_quality, - difficulty) - timelord_request = timelord_protocol.ChallengeStart(block.block.trunk_block.challenge.get_hash()) - timelord_request_end = timelord_protocol.ChallengeStart(block.block.trunk_block.proof_of_time. - output.challenge_hash) - # Tell timelord to stop previous challenge and start with new one - yield OutboundMessage(NodeType.TIMELORD, Message("challenge_end", timelord_request_end), Delivery.BROADCAST) - yield OutboundMessage(NodeType.TIMELORD, Message("challenge_start", timelord_request), Delivery.BROADCAST) + pos_quality = block.block.trunk_block.proof_of_space.verify_and_get_quality( + block.block.trunk_block.proof_of_time.output.challenge_hash + ) + farmer_request = farmer_protocol.ProofOfSpaceFinalized(block.block.trunk_block.challenge.get_hash(), + block.block.trunk_block.challenge.height, + pos_quality, + difficulty) + timelord_request = timelord_protocol.ChallengeStart(block.block.trunk_block.challenge.get_hash()) + timelord_request_end = timelord_protocol.ChallengeStart(block.block.trunk_block.proof_of_time. + output.challenge_hash) + # Tell timelord to stop previous challenge and start with new one + yield OutboundMessage(NodeType.TIMELORD, Message("challenge_end", timelord_request_end), Delivery.BROADCAST) + yield OutboundMessage(NodeType.TIMELORD, Message("challenge_start", timelord_request), Delivery.BROADCAST) - # Tell full nodes about the new block - yield OutboundMessage(NodeType.FULL_NODE, Message("block", block), Delivery.BROADCAST_TO_OTHERS) + # Tell full nodes about the new block + yield OutboundMessage(NodeType.FULL_NODE, Message("block", block), Delivery.BROADCAST_TO_OTHERS) - # Tell farmer about the new block - yield OutboundMessage(NodeType.FARMER, Message("proof_of_space_finalized", farmer_request), Delivery.BROADCAST) + # Tell farmer about the new block + yield OutboundMessage(NodeType.FARMER, Message("proof_of_space_finalized", farmer_request), Delivery.BROADCAST) diff --git a/src/plotter.py b/src/plotter.py index 4a076cc0..ca2076b9 100644 --- a/src/plotter.py +++ b/src/plotter.py @@ -76,7 +76,7 @@ async def new_challenge(new_challenge: plotter_protocol.NewChallenge): quality_strings = prover.get_qualities_for_challenge(new_challenge.challenge_hash) for index, quality_str in enumerate(quality_strings): quality = ProofOfSpace.quality_str_to_quality(new_challenge.challenge_hash, quality_str) - db.challenge_hashes[quality] = (new_challenge.challenge_hash, filename, index) + db.challenge_hashes[quality] = (new_challenge.challenge_hash, filename, uint8(index)) response: plotter_protocol.ChallengeResponse = plotter_protocol.ChallengeResponse( new_challenge.challenge_hash, quality, diff --git a/src/protocols/__init__.py b/src/protocols/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/protocols/farmer_protocol.py b/src/protocols/farmer_protocol.py index 36afec9a..597446c7 100644 --- a/src/protocols/farmer_protocol.py +++ b/src/protocols/farmer_protocol.py @@ -9,10 +9,7 @@ from src.types.coinbase import CoinbaseInfo Protocol between farmer and full node. """ -""" -Farmer <- Full node -Update current height -""" + @cbor_message(tag=2000) class ProofOfSpaceFinalized: challenge_hash: bytes32 diff --git a/src/protocols/peer_protocol.py b/src/protocols/peer_protocol.py index 5266b052..24056f1b 100644 --- a/src/protocols/peer_protocol.py +++ b/src/protocols/peer_protocol.py @@ -14,17 +14,33 @@ Protocol between full nodes. """ -Receive a transaction from a peer. +Receive a transaction id from a peer. """ @cbor_message(tag=4000) -class NewTransaction: +class TransactionId: + transaction_id: bytes32 + + +""" +Request a transaction from a peer. +""" +@cbor_message(tag=4001) +class RequestTransaction: + transaction_id: bytes32 + + +""" +Receive a transaction from a peer. +""" +@cbor_message(tag=4002) +class Transaction: transaction: Transaction """ Receive a new proof of time from a peer. """ -@cbor_message(tag=4001) +@cbor_message(tag=4003) class NewProofOfTime: proof: ProofOfTime @@ -32,7 +48,7 @@ class NewProofOfTime: """ Receive an unfinished block from a peer. """ -@cbor_message(tag=4002) +@cbor_message(tag=4004) class UnfinishedBlock: # Block that does not have ProofOfTime and Challenge block: FullBlock @@ -41,7 +57,7 @@ class UnfinishedBlock: """ Requests a block from a peer. """ -@cbor_message(tag=4003) +@cbor_message(tag=4005) class RequestBlock: header_hash: bytes32 @@ -49,7 +65,7 @@ class RequestBlock: """ Receive a block from a peer. """ -@cbor_message(tag=4004) +@cbor_message(tag=4006) class Block: block: FullBlock @@ -57,7 +73,7 @@ class Block: """ Return full list of peers """ -@cbor_message(tag=4005) +@cbor_message(tag=4007) class RequestPeers: pass @@ -65,7 +81,7 @@ class RequestPeers: """ Update list of peers """ -@cbor_message(tag=4006) +@cbor_message(tag=4008) class Peers: peer_list: List[PeerInfo] @@ -73,7 +89,7 @@ class Peers: """ Request trunks of blocks that are ancestors of the specified tip. """ -@cbor_message(tag=4007) +@cbor_message(tag=4009) class RequestTrunkBlocks: tip_header_hash: bytes32 heights: List[uint64] @@ -82,7 +98,7 @@ class RequestTrunkBlocks: """ Sends trunk blocks that are ancestors of the specified tip, at the specified heights. """ -@cbor_message(tag=4008) +@cbor_message(tag=4010) class TrunkBlocks: tip_header_hash: bytes32 trunk_blocks: List[TrunkBlock] @@ -91,7 +107,7 @@ class TrunkBlocks: """ Request download of blocks, in the blockchain that has 'tip_header_hash' as the tip """ -@cbor_message(tag=4009) +@cbor_message(tag=4011) class RequestSyncBlocks: tip_header_hash: bytes32 heights: List[uint64] @@ -100,7 +116,7 @@ class RequestSyncBlocks: """ Send blocks to peer. """ -@cbor_message(tag=4010) +@cbor_message(tag=4012) class SyncBlocks: tip_header_hash: bytes32 blocks: List[FullBlock] diff --git a/src/server/__init__.py b/src/server/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/server/server.py b/src/server/server.py index ae0fd474..0970ef49 100644 --- a/src/server/server.py +++ b/src/server/server.py @@ -226,7 +226,7 @@ async def initialize_pipeline(aiter, # length encoding and CBOR serialization async def serve_forever(): async for connection, message in expanded_messages_aiter: - log.info(f"Sending {message.function} to peer {connection.get_peername()}") + log.info(f"-> {message.function} to peer {connection.get_peername()}") try: await connection.send(message) except asyncio.CancelledError: diff --git a/src/server/start_farmer.py b/src/server/start_farmer.py index af120382..d0e47131 100644 --- a/src/server/start_farmer.py +++ b/src/server/start_farmer.py @@ -9,7 +9,7 @@ from src.protocols.plotter_protocol import PlotterHandshake from src.server.outbound_message import OutboundMessage, Message, Delivery, NodeType from src.util.network import parse_host_port -logging.basicConfig(format='Farmer %(name)-23s: %(levelname)-8s %(message)s', level=logging.INFO) +logging.basicConfig(format='Farmer %(name)-25s: %(levelname)-8s %(message)s', level=logging.INFO) async def main(): diff --git a/src/server/start_plotter.py b/src/server/start_plotter.py index 910e42dd..ccb22d34 100644 --- a/src/server/start_plotter.py +++ b/src/server/start_plotter.py @@ -5,7 +5,7 @@ from src.server.outbound_message import NodeType from src.util.network import parse_host_port from src import plotter -logging.basicConfig(format='Plotter %(name)-23s: %(levelname)-8s %(message)s', level=logging.INFO) +logging.basicConfig(format='Plotter %(name)-24s: %(levelname)-8s %(message)s', level=logging.INFO) async def main(): diff --git a/src/server/start_timelord.py b/src/server/start_timelord.py index 4158648b..e54dbe5c 100644 --- a/src/server/start_timelord.py +++ b/src/server/start_timelord.py @@ -5,7 +5,7 @@ from src.server.outbound_message import NodeType from src.util.network import parse_host_port from src import timelord -logging.basicConfig(format='Timelord %(name)-23s: %(levelname)-8s %(message)s', level=logging.INFO) +logging.basicConfig(format='Timelord %(name)-25s: %(levelname)-20s %(message)s', level=logging.INFO) async def main(): host, port = parse_host_port(timelord) diff --git a/src/simulation/simulate_network.sh b/src/simulation/simulate_network.sh index b471d7c0..fc6495d0 100755 --- a/src/simulation/simulate_network.sh +++ b/src/simulation/simulate_network.sh @@ -13,7 +13,7 @@ python -m src.server.start_full_node "127.0.0.1" 8005 & P6=$! _term() { - echo "Caught SIGTERM signal!" + echo "Caught SIGTERM signal, killing all servers." kill -TERM "$P1" 2>/dev/null kill -TERM "$P2" 2>/dev/null kill -TERM "$P3" 2>/dev/null @@ -23,4 +23,6 @@ _term() { } trap _term SIGTERM -wait $P1 $P2 $P3 $P4 $P5 $P6 +trap _term SIGINT +trap _term INT +wait $P1 $P2 $P3 $P4 $P5 $P6 \ No newline at end of file diff --git a/src/timelord.py b/src/timelord.py index 89aa4a03..dc5f6b23 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -15,7 +15,7 @@ from src.protocols import timelord_protocol from src.types.proof_of_time import ProofOfTimeOutput, ProofOfTime from src.types.classgroup import ClassgroupElement from src.util.ints import uint8 -from src.consensus import constants +from src.consensus.constants import constants from src.server.outbound_message import OutboundMessage, Delivery, Message, NodeType class Database: @@ -39,7 +39,7 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): forever. """ - disc: int = create_discriminant(challenge_start.challenge_hash, constants.DISCRIMINANT_SIZE_BITS) + disc: int = create_discriminant(challenge_start.challenge_hash, constants["DISCRIMINANT_SIZE_BITS"]) async with db.lock: if challenge_start.challenge_hash in db.done_discriminants: diff --git a/src/types/proof_of_time.py b/src/types/proof_of_time.py index 861d31ed..f3c73e75 100644 --- a/src/types/proof_of_time.py +++ b/src/types/proof_of_time.py @@ -3,7 +3,6 @@ from src.util.streamable import streamable from src.types.sized_bytes import bytes32 from src.types.classgroup import ClassgroupElement from src.util.ints import uint8, uint64 -from src.consensus import constants from lib.chiavdf.inkfish.proof_of_time import check_proof_of_time_nwesolowski from lib.chiavdf.inkfish.create_discriminant import create_discriminant from lib.chiavdf.inkfish.classgroup import ClassGroup @@ -22,13 +21,13 @@ class ProofOfTime: witness_type: uint8 witness: List[uint8] - def is_valid(self): + def is_valid(self, discriminant_size_bits): disc: int = create_discriminant(self.output.challenge_hash, - constants.DISCRIMINANT_SIZE_BITS) + discriminant_size_bits) x = ClassGroup.from_ab_discriminant(2, 1, disc) y = ClassGroup.from_ab_discriminant(self.output.output.a, self.output.output.b, disc) return check_proof_of_time_nwesolowski(disc, x, y.serialize() + bytes(self.witness), self.output.number_of_iterations, - constants.DISCRIMINANT_SIZE_BITS, + discriminant_size_bits, self.witness_type) diff --git a/src/util/__init__.py b/src/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/util/api_decorators.py b/src/util/api_decorators.py index ab1a9f72..550e7b31 100644 --- a/src/util/api_decorators.py +++ b/src/util/api_decorators.py @@ -18,8 +18,6 @@ def api_request(f): binding = sig.bind(*args, **kwargs) binding.apply_defaults() inter = dict(binding.arguments) - print_args = {k: v for (k, v) in inter.items() if k != "source_connection" - and k != "all_connections"} - log.info(f"{f.__name__}({print_args})"[:200]) + log.info(f"<- {f.__name__}") return f(**inter) return f_substitute diff --git a/src/util/errors.py b/src/util/errors.py index bdee87f5..a07a5fd3 100644 --- a/src/util/errors.py +++ b/src/util/errors.py @@ -42,3 +42,8 @@ class PeersDontHaveBlock(Exception): class InvalidWeight(Exception): """The weight of this block can not be validated""" pass + + +class InvalidUnfinishedBlock(Exception): + """The unfinished block we received is invalid""" + pass diff --git a/tests/block_tools.py b/tests/block_tools.py index 4d7f6055..95e8d558 100644 --- a/tests/block_tools.py +++ b/tests/block_tools.py @@ -14,7 +14,7 @@ from src.types.block_header import BlockHeader, BlockHeaderData from src.types.proof_of_space import ProofOfSpace from src.types.proof_of_time import ProofOfTime, ProofOfTimeOutput from src.types.classgroup import ClassgroupElement -from src.consensus import constants, pot_iterations, block_rewards +from src.consensus import pot_iterations, block_rewards from src.util.ints import uint64, uint32, uint8 from src.util.errors import NoProofsOfSpaceFound from src.types.coinbase import CoinbaseInfo @@ -22,16 +22,22 @@ from src.types.fees_target import FeesTarget from lib.chiavdf.inkfish.create_discriminant import create_discriminant from lib.chiavdf.inkfish.classgroup import ClassGroup from lib.chiavdf.inkfish.proof_of_time import create_proof_of_time_nwesolowski +from src.consensus.constants import constants +# Can't go much lower than 19, since plots start having no solutions +k = 19 +# Uses many plots for testing, in order to guarantee blocks at every height +num_plots = 80 # Use the empty string as the seed for the private key -sk: PrivateKey = PrivateKey.from_seed(b'') -pool_pk: PublicKey = sk.get_public_key() -plot_pk: PublicKey = sk.get_public_key() -coinbase_target = sha256(sk.get_public_key().serialize()).digest() -fee_target = sha256(sk.get_public_key().serialize()).digest() -k = 20 -num_plots = 4 +pool_sk: PrivateKey = PrivateKey.from_seed(b'') +pool_pk: PublicKey = pool_sk.get_public_key() +plot_sks: List[PrivateKey] = [PrivateKey.from_seed(pn.to_bytes(4, "big")) for pn in range(num_plots)] +plot_pks: List[PublicKey] = [sk.get_public_key() for sk in plot_sks] + +farmer_sk: PrivateKey = PrivateKey.from_seed(b'coinbase') +coinbase_target = sha256(farmer_sk.get_public_key().serialize()).digest() +fee_target = sha256(farmer_sk.get_public_key().serialize()).digest() n_wesolowski = 3 @@ -41,58 +47,117 @@ class BlockTools: """ def __init__(self): - self.plot_seed: bytes32 = ProofOfSpace.calculate_plot_seed(pool_pk, plot_pk) - self.filenames: List[str] = [os.path.join("tests", "plots", "genesis-plot-" + str(k) + + plot_seeds: List[bytes32] = [ProofOfSpace.calculate_plot_seed(pool_pk, plot_pk) for plot_pk in plot_pks] + self.filenames: List[str] = [os.path.join("tests", "plots", "genesis-plots-" + str(k) + sha256(int.to_bytes(i, 4, "big")).digest().hex() + ".dat") for i in range(num_plots)] try: - for filename in self.filenames: + for pn, filename in enumerate(self.filenames): if not os.path.exists(filename): plotter = DiskPlotter() - plotter.create_plot_disk(filename, k, b"genesis", self.plot_seed) + plotter.create_plot_disk(filename, k, b"genesis", plot_seeds[pn]) except KeyboardInterrupt: for filename in self.filenames: if os.path.exists(filename): os.remove(filename) sys.exit(1) - def get_consecutive_blocks(self, num_blocks: int) -> List[FullBlock]: + def get_consecutive_blocks(self, + num_blocks: int, + difficulty=constants["DIFFICULTY_STARTING"], + discriminant_size=constants["DISCRIMINANT_SIZE_BITS"], + seconds_per_block=constants["BLOCK_TIME_TARGET"]) -> List[FullBlock]: for i in range(100): block_list = [] try: - block_list.append(self.create_genesis_block(bytes([i]*32))) - for _ in range(num_blocks - 1): - block_list.append(self.create_next_block(block_list[-1])) + block_list.append(self.create_genesis_block(bytes([i]*32), difficulty, discriminant_size)) + prev_difficulty = difficulty + curr_difficulty = difficulty + timestamp = block_list[0].trunk_block.header.data.timestamp + for next_height in range(1, num_blocks): + if (next_height > constants["DIFFICULTY_EPOCH"] and + next_height % constants["DIFFICULTY_EPOCH"] == constants["DIFFICULTY_DELAY"]): + # Calculates new difficulty + height1 = uint64(next_height - (constants["DIFFICULTY_EPOCH"] + + constants["DIFFICULTY_DELAY"]) - 1) + height2 = uint64(next_height - (constants["DIFFICULTY_EPOCH"]) - 1) + height3 = uint64(next_height - (constants["DIFFICULTY_DELAY"]) - 1) + if height1 >= 0: + timestamp1 = block_list[height1].trunk_block.header.data.timestamp + else: + timestamp1 = (block_list[0].trunk_block.header.data.timestamp - + constants["BLOCK_TIME_TARGET"]) + timestamp2 = block_list[height2].trunk_block.header.data.timestamp + timestamp3 = block_list[height3].trunk_block.header.data.timestamp + term1 = (constants["DIFFICULTY_DELAY"] * prev_difficulty * + (timestamp3 - timestamp2) * constants["BLOCK_TIME_TARGET"]) + + term2 = ((constants["DIFFICULTY_WARP_FACTOR"] - 1) * + (constants["DIFFICULTY_EPOCH"] - constants["DIFFICULTY_DELAY"]) * curr_difficulty + * (timestamp2 - timestamp1) * constants["BLOCK_TIME_TARGET"]) + + # Round down after the division + new_difficulty: uint64 = uint64((term1 + term2) // + (constants["DIFFICULTY_WARP_FACTOR"] * + (timestamp3 - timestamp2) * + (timestamp2 - timestamp1))) + + if new_difficulty >= curr_difficulty: + new_difficulty = min(new_difficulty, uint64(constants["DIFFICULTY_FACTOR"] * + curr_difficulty)) + else: + new_difficulty = max([uint64(1), new_difficulty, + uint64(curr_difficulty // constants["DIFFICULTY_FACTOR"])]) + + prev_difficulty = curr_difficulty + curr_difficulty = new_difficulty + time_taken = seconds_per_block + timestamp += time_taken + block_list.append(self.create_next_block(block_list[-1], timestamp, curr_difficulty, + discriminant_size)) return block_list except NoProofsOfSpaceFound: pass raise NoProofsOfSpaceFound - def create_genesis_block(self, challenge_hash=bytes([0]*32)) -> FullBlock: + def create_genesis_block(self, challenge_hash=bytes([0]*32), difficulty=constants["DIFFICULTY_STARTING"], + discriminant_size=constants["DISCRIMINANT_SIZE_BITS"]) -> FullBlock: return self._create_block( challenge_hash, uint32(0), bytes([0]*32), uint64(0), uint64(0), - uint64(constants.DIFFICULTY_STARTING) + uint64(time.time()), + uint64(difficulty), + discriminant_size ) - def create_next_block(self, prev_block: FullBlock) -> FullBlock: + def create_next_block(self, prev_block: FullBlock, timestamp: uint64, + difficulty=constants["DIFFICULTY_STARTING"], + discriminant_size=constants["DISCRIMINANT_SIZE_BITS"]) -> FullBlock: return self._create_block( prev_block.trunk_block.challenge.get_hash(), prev_block.height + 1, prev_block.header_hash, prev_block.trunk_block.challenge.total_iters, prev_block.weight, - uint64(constants.DIFFICULTY_STARTING)) + timestamp, + uint64(difficulty), + discriminant_size) def _create_block(self, challenge_hash: bytes32, height: uint32, prev_header_hash: bytes32, - prev_iters: uint64, prev_weight: uint64, difficulty: uint64) -> FullBlock: + prev_iters: uint64, prev_weight: uint64, timestamp: uint64, difficulty: uint64, + discriminant_size: uint64) -> FullBlock: prover = None + plot_pk = None + plot_sk = None qualities = [] - for filename in self.filenames: + for pn in range(num_plots): + filename = self.filenames[pn] + plot_pk = plot_pks[pn] + plot_sk = plot_sks[pn] prover = DiskProver(filename) qualities = prover.get_qualities_for_challenge(challenge_hash) if len(qualities) > 0: @@ -107,11 +172,10 @@ class BlockTools: number_iters: uint64 = pot_iterations.calculate_iterations(proof_of_space, challenge_hash, difficulty) - disc: int = create_discriminant(challenge_hash, constants.DISCRIMINANT_SIZE_BITS) + disc: int = create_discriminant(challenge_hash, discriminant_size) start_x: ClassGroup = ClassGroup.from_ab_discriminant(2, 1, disc) - y_cl, proof_bytes = create_proof_of_time_nwesolowski( - disc, start_x, number_iters, constants.DISCRIMINANT_SIZE_BITS, n_wesolowski) + disc, start_x, number_iters, disc, n_wesolowski) output = ProofOfTimeOutput(challenge_hash, number_iters, ClassgroupElement(y_cl[0], y_cl[1])) @@ -120,19 +184,17 @@ class BlockTools: coinbase: CoinbaseInfo = CoinbaseInfo(height, block_rewards.calculate_block_reward(uint32(height)), coinbase_target) - coinbase_sig: PrependSignature = sk.sign_prepend(coinbase.serialize()) + coinbase_sig: PrependSignature = pool_sk.sign_prepend(coinbase.serialize()) fees_target: FeesTarget = FeesTarget(fee_target, 0) body: BlockBody = BlockBody(coinbase, coinbase_sig, fees_target, None, bytes([0]*32)) - timestamp = uint64(time.time()) - header_data: BlockHeaderData = BlockHeaderData(prev_header_hash, timestamp, bytes([0]*32), proof_of_space.get_hash(), body.get_hash(), bytes([0]*32)) - header_hash_sig: PrependSignature = sk.sign_prepend(header_data.get_hash()) + header_hash_sig: PrependSignature = plot_sk.sign_prepend(header_data.get_hash()) header: BlockHeader = BlockHeader(header_data, header_hash_sig) @@ -146,3 +208,5 @@ class BlockTools: # print(create_genesis_block().serialize()) +# bt = BlockTools() +# print(bt.create_genesis_block(bytes([4]*32)).serialize()) diff --git a/tests/test_blockchain.py b/tests/test_blockchain.py index 6368ddb7..96fcce16 100644 --- a/tests/test_blockchain.py +++ b/tests/test_blockchain.py @@ -1,3 +1,4 @@ +from src.consensus.constants import constants import time import pytest from blspy import PrivateKey @@ -32,62 +33,67 @@ class TestBlockValidation(): @pytest.fixture(scope="module") def initial_blockchain(self): """ - Provides a list of 3 valid blocks, as well as a blockchain with 2 blocks added to it. + Provides a list of 10 valid blocks, as well as a blockchain with 9 blocks added to it. """ - blocks = bt.get_consecutive_blocks(3) - b: Blockchain = Blockchain(blocks[0]) - assert b.receive_block(blocks[1]) == ReceiveBlockResult.ADDED_TO_HEAD + blocks = bt.get_consecutive_blocks(10, 5, 16) + b: Blockchain = Blockchain({ + "GENESIS_BLOCK": blocks[0].serialize(), + "DIFFICULTY_STARTING": 5, + "DISCRIMINANT_SIZE_BITS": 16 + }) + for i in range(1, 9): + assert b.receive_block(blocks[i]) == ReceiveBlockResult.ADDED_TO_HEAD return (blocks, b) def test_prev_pointer(self, initial_blockchain): blocks, b = initial_blockchain block_bad = FullBlock(TrunkBlock( - blocks[2].trunk_block.proof_of_space, - blocks[2].trunk_block.proof_of_time, - blocks[2].trunk_block.challenge, + blocks[9].trunk_block.proof_of_space, + blocks[9].trunk_block.proof_of_time, + blocks[9].trunk_block.challenge, BlockHeader(BlockHeaderData( bytes([1]*32), - blocks[2].trunk_block.header.data.timestamp, - blocks[2].trunk_block.header.data.filter_hash, - blocks[2].trunk_block.header.data.proof_of_space_hash, - blocks[2].trunk_block.header.data.body_hash, - blocks[2].trunk_block.header.data.extension_data - ), blocks[2].trunk_block.header.plotter_signature) - ), blocks[2].body) + blocks[9].trunk_block.header.data.timestamp, + blocks[9].trunk_block.header.data.filter_hash, + blocks[9].trunk_block.header.data.proof_of_space_hash, + blocks[9].trunk_block.header.data.body_hash, + blocks[9].trunk_block.header.data.extension_data + ), blocks[9].trunk_block.header.plotter_signature) + ), blocks[9].body) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK def test_timestamp(self, initial_blockchain): blocks, b = initial_blockchain # Time too far in the past block_bad = FullBlock(TrunkBlock( - blocks[2].trunk_block.proof_of_space, - blocks[2].trunk_block.proof_of_time, - blocks[2].trunk_block.challenge, + blocks[9].trunk_block.proof_of_space, + blocks[9].trunk_block.proof_of_time, + blocks[9].trunk_block.challenge, BlockHeader(BlockHeaderData( - blocks[2].trunk_block.header.data.prev_header_hash, - blocks[2].trunk_block.header.data.timestamp - 1000, - blocks[2].trunk_block.header.data.filter_hash, - blocks[2].trunk_block.header.data.proof_of_space_hash, - blocks[2].trunk_block.header.data.body_hash, - blocks[2].trunk_block.header.data.extension_data - ), blocks[2].trunk_block.header.plotter_signature) - ), blocks[2].body) + blocks[9].trunk_block.header.data.prev_header_hash, + blocks[9].trunk_block.header.data.timestamp - 1000, + blocks[9].trunk_block.header.data.filter_hash, + blocks[9].trunk_block.header.data.proof_of_space_hash, + blocks[9].trunk_block.header.data.body_hash, + blocks[9].trunk_block.header.data.extension_data + ), blocks[9].trunk_block.header.plotter_signature) + ), blocks[9].body) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK # Time too far in the future block_bad = FullBlock(TrunkBlock( - blocks[2].trunk_block.proof_of_space, - blocks[2].trunk_block.proof_of_time, - blocks[2].trunk_block.challenge, + blocks[9].trunk_block.proof_of_space, + blocks[9].trunk_block.proof_of_time, + blocks[9].trunk_block.challenge, BlockHeader(BlockHeaderData( - blocks[2].trunk_block.header.data.prev_header_hash, + blocks[9].trunk_block.header.data.prev_header_hash, time.time() + 3600 * 3, - blocks[2].trunk_block.header.data.filter_hash, - blocks[2].trunk_block.header.data.proof_of_space_hash, - blocks[2].trunk_block.header.data.body_hash, - blocks[2].trunk_block.header.data.extension_data - ), blocks[2].trunk_block.header.plotter_signature) - ), blocks[2].body) + blocks[9].trunk_block.header.data.filter_hash, + blocks[9].trunk_block.header.data.proof_of_space_hash, + blocks[9].trunk_block.header.data.body_hash, + blocks[9].trunk_block.header.data.extension_data + ), blocks[9].trunk_block.header.plotter_signature) + ), blocks[9].body) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK @@ -95,65 +101,87 @@ class TestBlockValidation(): blocks, b = initial_blockchain # Time too far in the past block_bad = FullBlock(TrunkBlock( - blocks[2].trunk_block.proof_of_space, - blocks[2].trunk_block.proof_of_time, - blocks[2].trunk_block.challenge, + blocks[9].trunk_block.proof_of_space, + blocks[9].trunk_block.proof_of_time, + blocks[9].trunk_block.challenge, BlockHeader(BlockHeaderData( - blocks[2].trunk_block.header.data.prev_header_hash, - blocks[2].trunk_block.header.data.timestamp, - blocks[2].trunk_block.header.data.filter_hash, - blocks[2].trunk_block.header.data.proof_of_space_hash, + blocks[9].trunk_block.header.data.prev_header_hash, + blocks[9].trunk_block.header.data.timestamp, + blocks[9].trunk_block.header.data.filter_hash, + blocks[9].trunk_block.header.data.proof_of_space_hash, bytes([1]*32), - blocks[2].trunk_block.header.data.extension_data - ), blocks[2].trunk_block.header.plotter_signature) - ), blocks[2].body) + blocks[9].trunk_block.header.data.extension_data + ), blocks[9].trunk_block.header.plotter_signature) + ), blocks[9].body) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK def test_plotter_signature(self, initial_blockchain): blocks, b = initial_blockchain # Time too far in the past block_bad = FullBlock(TrunkBlock( - blocks[2].trunk_block.proof_of_space, - blocks[2].trunk_block.proof_of_time, - blocks[2].trunk_block.challenge, + blocks[9].trunk_block.proof_of_space, + blocks[9].trunk_block.proof_of_time, + blocks[9].trunk_block.challenge, BlockHeader( - blocks[2].trunk_block.header.data, + blocks[9].trunk_block.header.data, PrivateKey.from_seed(b'0').sign_prepend(b"random junk")) - ), blocks[2].body) + ), blocks[9].body) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK def test_invalid_pos(self, initial_blockchain): blocks, b = initial_blockchain - bad_pos = blocks[2].trunk_block.proof_of_space.proof + bad_pos = blocks[9].trunk_block.proof_of_space.proof bad_pos[0] = (bad_pos[0] + 1) % 256 # Proof of space invalid block_bad = FullBlock(TrunkBlock( ProofOfSpace( - blocks[2].trunk_block.proof_of_space.pool_pubkey, - blocks[2].trunk_block.proof_of_space.plot_pubkey, - blocks[2].trunk_block.proof_of_space.size, + blocks[9].trunk_block.proof_of_space.pool_pubkey, + blocks[9].trunk_block.proof_of_space.plot_pubkey, + blocks[9].trunk_block.proof_of_space.size, bad_pos ), - blocks[2].trunk_block.proof_of_time, - blocks[2].trunk_block.challenge, - blocks[2].trunk_block.header - ), blocks[2].body) + blocks[9].trunk_block.proof_of_time, + blocks[9].trunk_block.challenge, + blocks[9].trunk_block.header + ), blocks[9].body) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK def test_invalid_coinbase_height(self, initial_blockchain): blocks, b = initial_blockchain # Coinbase height invalid - block_bad = FullBlock(blocks[2].trunk_block, BlockBody( + block_bad = FullBlock(blocks[9].trunk_block, BlockBody( CoinbaseInfo( 3, - blocks[2].body.coinbase.amount, - blocks[2].body.coinbase.puzzle_hash + blocks[9].body.coinbase.amount, + blocks[9].body.coinbase.puzzle_hash ), - blocks[2].body.coinbase_signature, - blocks[2].body.fees_target_info, - blocks[2].body.aggregated_signature, - blocks[2].body.solutions_generator + blocks[9].body.coinbase_signature, + blocks[9].body.fees_target_info, + blocks[9].body.aggregated_signature, + blocks[9].body.solutions_generator )) assert b.receive_block(block_bad) == ReceiveBlockResult.INVALID_BLOCK + + def test_difficulty_change(self): + num_blocks = 20 + # Make it 5x faster than target time + blocks = bt.get_consecutive_blocks(num_blocks, 5, 16, 1) + b: Blockchain = Blockchain({ + "GENESIS_BLOCK": blocks[0].serialize(), + "DIFFICULTY_STARTING": 5, + "DISCRIMINANT_SIZE_BITS": 16, + "BLOCK_TIME_TARGET": 10, + "DIFFICULTY_EPOCH": 12, # The number of blocks per epoch + "DIFFICULTY_WARP_FACTOR": 4, # DELAY divides EPOCH in order to warp efficiently. + "DIFFICULTY_DELAY": 3 # EPOCH / WARP_FACTOR + }) + + for i in range(1, num_blocks): + assert b.receive_block(blocks[i]) == ReceiveBlockResult.ADDED_TO_HEAD + + assert b.get_difficulty(blocks[14].header_hash) == b.get_difficulty(blocks[13].header_hash) + assert b.get_difficulty(blocks[15].header_hash) > b.get_difficulty(blocks[14].header_hash) + assert ((b.get_difficulty(blocks[15].header_hash) / b.get_difficulty(blocks[14].header_hash) + <= constants["DIFFICULTY_FACTOR"])) From d00c4284dfa992dd5600775ef7f616b026483905 Mon Sep 17 00:00:00 2001 From: fchirica Date: Tue, 8 Oct 2019 19:14:53 +0300 Subject: [PATCH 09/13] More attemps to fix memory leaks --- lib/chiavdf/fast_vdf/vdf.cpp | 39 ++++++++++++----------------- lib/chiavdf/fast_vdf/vdf_original.h | 5 ++-- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index 8f03555c..d52aaedc 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -99,18 +99,16 @@ public: ClassGroupContext *t; Reducer *reducer; - vdf_original vdfo; + vdf_original* vdfo; WesolowskiCallback(uint64_t expected_space) { - // = (form*) malloc(sizeof(struct form) * expected_space); - + vdfo = new vdf_original(); t=new ClassGroupContext(4096); reducer=new Reducer(*t); } ~WesolowskiCallback() { - //free(forms); - + delete(vdfo); delete(reducer); delete(t); } @@ -151,17 +149,6 @@ public: return &(forms[GetPosition(power)]); } - form GetFormFromCheckpoint(vdf_original &vdfo, int power) { - uint64 checkpoint = power - power % 100; - form checkpoint_form; - mpz_init(checkpoint_form.a.impl); - mpz_init(checkpoint_form.b.impl); - mpz_init(checkpoint_form.c.impl); - checkpoint_form = forms[GetPosition(checkpoint)]; - repeated_square_original(vdfo, checkpoint_form, D, L, 0, power % 100, NULL); - return checkpoint_form; - } - void OnIteration(int type, void *data, uint64 iteration) { iteration++; @@ -255,7 +242,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb #ifdef ENABLE_TRACK_CYCLES print( "track cycles enabled; results will be wrong" ); - repeated_square_original(weso.vdfo, f, D, L, 100); //randomize the a and b values + repeated_square_original(*weso.vdfo, f, D, L, 100); //randomize the a and b values #endif // This works single threaded @@ -276,7 +263,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb if (actual_iterations==~uint64(0)) { //corruption; f is unchanged. do the entire batch with the slow algorithm - repeated_square_original(weso.vdfo, f, D, L, num_iterations, batch_size, &weso); + repeated_square_original(*weso.vdfo, f, D, L, num_iterations, batch_size, &weso); actual_iterations=batch_size; #ifdef VDF_TEST @@ -292,7 +279,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb //the fast algorithm terminated prematurely for whatever reason. f is still valid //it might terminate prematurely again (e.g. gcd quotient too large), so will do one iteration of the slow algorithm //this will also reduce f if the fast algorithm terminated because it was too big - repeated_square_original(weso.vdfo, f, D, L, num_iterations+actual_iterations, 1, &weso); + repeated_square_original(*weso.vdfo, f, D, L, num_iterations+actual_iterations, 1, &weso); #ifdef VDF_TEST ++num_iterations_slow; @@ -312,7 +299,7 @@ void repeated_square(form f, const integer& D, const integer& L, WesolowskiCallb form f_copy_2=f; weso.reduce(f_copy_2); - repeated_square_original(weso.vdfo, f_copy, D, L, actual_iterations); + repeated_square_original(&weso.vdfo, f_copy, D, L, actual_iterations); assert(f_copy==f_copy_2); } #endif @@ -595,9 +582,15 @@ Proof CreateProofOfTimeWesolowski(integer& D, form x, int64_t num_iterations, ui if (stop_signal) return Proof(); - vdf_original vdfo; + vdf_original vdfo_proof; - form y = weso.GetFormFromCheckpoint(vdfo, done_iterations + num_iterations); + uint64 checkpoint = (done_iterations + num_iterations) - (done_iterations + num_iterations) % 100; + //mpz_init(y.a.impl); + //mpz_init(y.b.impl); + //mpz_init(y.c.impl); + form y = forms[weso.GetPosition(checkpoint)]; + repeated_square_original(vdfo_proof, y, D, L, 0, (done_iterations + num_iterations) % 100, NULL); + auto proof = GenerateProof(y, x_init, D, done_iterations, num_iterations, k, l, weso, stop_signal); if (stop_signal) @@ -687,8 +680,8 @@ void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallba bytes.insert(bytes.end(), result.y.begin(), result.y.end()); bytes.insert(bytes.end(), result.proof.begin(), result.proof.end()); std::string str_result = BytesToStr(bytes); - std::cout << "Generated proof = " << str_result << "\n"; std::lock_guard lock(socket_mutex); + std::cout << "Generated proof = " << str_result << "\n"; boost::asio::write(sock, boost::asio::buffer(str_result.c_str(), str_result.size())); } diff --git a/lib/chiavdf/fast_vdf/vdf_original.h b/lib/chiavdf/fast_vdf/vdf_original.h index d8cb3a57..d225d845 100644 --- a/lib/chiavdf/fast_vdf/vdf_original.h +++ b/lib/chiavdf/fast_vdf/vdf_original.h @@ -319,8 +319,7 @@ public: } ~vdf_original() { - /*mpz_clears(negative_a, r, denom, old_a, old_b, ra, s, x, g, d, e, q, w, m, - u, a, b, k, mu, v, sigma, lambda, f3.a, f3.b, f3.c); - */ + mpz_clears(negative_a, r, denom, old_a, old_b, ra, s, x, g, d, e, q, w, m, + u, a, b, k, mu, v, sigma, lambda, f3.a, f3.b, f3.c, NULL); //,); } }; From b1008895503bf01278bef6db46c96602e831eae5 Mon Sep 17 00:00:00 2001 From: fchirica Date: Wed, 9 Oct 2019 02:21:00 +0300 Subject: [PATCH 10/13] Hack to extend a block with multiple iters (free up VDF server) --- src/full_node.py | 9 +++++++++ src/timelord.py | 13 +++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/full_node.py b/src/full_node.py index 12faac72..8d12a75c 100644 --- a/src/full_node.py +++ b/src/full_node.py @@ -545,6 +545,7 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N """ Receive a full block from a peer full node (or ourselves). """ + header_hash = block.block.trunk_block.header.get_hash() async with db.lock: @@ -619,3 +620,11 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N # Tell farmer about the new block yield OutboundMessage(NodeType.FARMER, Message("proof_of_space_finalized", farmer_request), Delivery.BROADCAST) + else: + # Note(Florin): This is a hack... + log.info("I've received a block, stopping the challenge to free up the VDF server...") + log.info(f"Height of received block = {block.block.trunk_block.challenge.height}") + timelord_request_end = timelord_protocol.ChallengeStart(block.block.trunk_block.proof_of_time. + output.challenge_hash) + yield OutboundMessage(NodeType.TIMELORD, Message("challenge_end", timelord_request_end), Delivery.BROADCAST) + diff --git a/src/timelord.py b/src/timelord.py index dc5f6b23..67e355d5 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -107,11 +107,11 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): response = timelord_protocol.ProofOfTimeFinished(proof_of_time) log.info(f"Got PoT for challenge {challenge_start.challenge_hash}") - async with db.lock: - if (challenge_start.challenge_hash in db.solved_discriminants): - log.info("I've already propagated one proof... Ignoring for now...") - continue - db.solved_discriminants.append(challenge_start.challenge_hash) + #async with db.lock: + # if (challenge_start.challenge_hash in db.solved_discriminants): + # log.info("I've already propagated one proof... Ignoring for now...") + # continue + # db.solved_discriminants.append(challenge_start.challenge_hash) yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) except Exception as e: e_to_str = str(e) @@ -133,6 +133,7 @@ async def challenge_end(challenge_end: timelord_protocol.ChallengeEnd): await writer.drain() del db.active_discriminants[challenge_end.challenge_hash] db.done_discriminants.append(challenge_end.challenge_hash) + await asyncio.sleep(0.5) @api_request async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpaceInfo): @@ -153,4 +154,4 @@ async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpac if (proof_of_space_info.challenge_hash in db.done_discriminants): log.info("Got iters for a finished challenge") return - await asyncio.sleep(3) + await asyncio.sleep(0.5) From 30305a72d2291db1111433e3c4ae6ccb29f06e47 Mon Sep 17 00:00:00 2001 From: fchirica Date: Thu, 10 Oct 2019 03:39:39 +0300 Subject: [PATCH 11/13] Get timelord only the latest blocks. Free VDF servers --- lib/chiavdf/fast_vdf/vdf.cpp | 18 +++++++++++++++ src/timelord.py | 44 +++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index d52aaedc..8a2b8217 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -685,6 +685,20 @@ void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallba boost::asio::write(sock, boost::asio::buffer(str_result.c_str(), str_result.size())); } +void PollTimelord(tcp::socket& sock, bool& got_iters) { + // Wait for 60s, if no iters come, poll each 15 seconds the timelord. + int seconds = 0; + while (!got_iters) { + std::this_thread::sleep_for (std::chrono::seconds(1)); + seconds++; + if (seconds >= 60 && (seconds - 60) % 15 == 0) { + socket_mutex.lock(); + boost::asio::write(sock, boost::asio::buffer("POLL", 4)); + socket_mutex.unlock(); + } + } +} + const int max_length = 2048; void session(tcp::socket sock) { @@ -745,7 +759,9 @@ void session(tcp::socket sock) { weso.kl = 10; bool stopped = false; + bool got_iters = false; std::thread vdf_worker(repeated_square, f, D, L, std::ref(weso), std::ref(stopped)); + std::thread poll_thread(PollTimelord, std::ref(sock), std::ref(got_iters)); // Tell client that I'm ready to get the challenges. boost::asio::write(sock, boost::asio::buffer("OK", 2)); @@ -759,6 +775,7 @@ void session(tcp::socket sock) { boost::asio::read(sock, boost::asio::buffer(data, size), error); int iters = atoi(data); std::cout << "Got iterations " << iters << "\n"; + got_iters = true; if (seen_iterations.size() > 0 && *seen_iterations.begin() <= iters) { std::cout << "Ignoring..." << iters << "\n"; continue; @@ -771,6 +788,7 @@ void session(tcp::socket sock) { if (iters == 0) { stopped = true; + poll_thread.join(); for (int t = 0; t < threads.size(); t++) { threads[t].join(); } diff --git a/src/timelord.py b/src/timelord.py index 67e355d5..a5fb450b 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -21,14 +21,17 @@ from src.server.outbound_message import OutboundMessage, Delivery, Message, Node class Database: lock: Lock = Lock() free_servers = [] - solved_discriminants = [] active_discriminants: Dict = {} done_discriminants = [] + seen_discriminants = [] + counter = 0 + active_counters = [] log = logging.getLogger(__name__) config = yaml.safe_load(open("src/config/timelord.yaml", "r")) db = Database() db.free_servers.append(8889) +db.free_servers.append(8890) @api_request async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): @@ -42,18 +45,30 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): disc: int = create_discriminant(challenge_start.challenge_hash, constants["DISCRIMINANT_SIZE_BITS"]) async with db.lock: - if challenge_start.challenge_hash in db.done_discriminants: - log.info("This discriminant was already done..") + if (challenge_start.challenge_hash in db.seen_discriminants): + log.info("Already seen this one... Ignoring") return + db.seen_discriminants.append(challenge_start.challenge_hash) + db.counter += 1 + current_counter = db.counter + db.active_counters.append(db.counter) #Wait for a server to become free. port = None while (port is None): async with db.lock: - if (len(db.free_servers) != 0): - port = db.free_servers[0] - db.free_servers = db.free_servers[1:] - log.info(f"Discriminant {disc} attached to port {port}.") + if (current_counter == max(db.active_counters)): + if (len(db.free_servers) != 0): + port = db.free_servers[0] + db.free_servers = db.free_servers[1:] + log.info(f"Discriminant {disc} attached to port {port}.") + db.active_counters.remove(current_counter) + break + #This is way too far... Stop polling the server. + if (current_counter < db.counter - 10): + db.active_counters.remove(current_counter) + db.done_discriminants.append(challenge_start.challenge_hash) + return #Poll until a server becomes free. if (port is None): await asyncio.sleep(3) @@ -86,6 +101,15 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): await writer.drain() db.free_servers.append(port) break + elif (data.decode() == "POLL"): + async with db.lock: + # If I have a newer discriminant... Free up the VDF server + if (current_counter < max(db.active_counters)): + log.info("Got poll, stopping the challenge!") + writer.write(b'10') + await writer.drain() + del db.active_discriminants[challenge_start.challenge_hash] + db.done_discriminants.append(challenge_start.challenge_hash) else: try: #This must be a proof, read the continuation. @@ -107,11 +131,6 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): response = timelord_protocol.ProofOfTimeFinished(proof_of_time) log.info(f"Got PoT for challenge {challenge_start.challenge_hash}") - #async with db.lock: - # if (challenge_start.challenge_hash in db.solved_discriminants): - # log.info("I've already propagated one proof... Ignoring for now...") - # continue - # db.solved_discriminants.append(challenge_start.challenge_hash) yield OutboundMessage(NodeType.FULL_NODE, Message("proof_of_time_finished", response), Delivery.RESPOND) except Exception as e: e_to_str = str(e) @@ -152,6 +171,5 @@ async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpac await writer.drain() return if (proof_of_space_info.challenge_hash in db.done_discriminants): - log.info("Got iters for a finished challenge") return await asyncio.sleep(0.5) From dad8bb55e84026a411d7f5088c7a77b729f14cfa Mon Sep 17 00:00:00 2001 From: Mariano Sorgente Date: Thu, 10 Oct 2019 13:10:18 +0900 Subject: [PATCH 12/13] .gitignore changes --- .gitignore | 8 +++++++- lib/chiavdf/fast_vdf/install_child.sh | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index dc2cb822..7a6fc464 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,13 @@ __pycache__/ # C extensions *.so **/*.o -.DS_Store +**/*.DS_Store + +# VDF executables +lib/chiavdf/fast_vdf/compile_asm +lib/chiavdf/fast_vdf/vdf +# Flint dependency +lib/chiavdf/fast_vdf/flint # PyInstaller # Usually these files are written by a python script from a template diff --git a/lib/chiavdf/fast_vdf/install_child.sh b/lib/chiavdf/fast_vdf/install_child.sh index 551170f2..0e036aa6 100755 --- a/lib/chiavdf/fast_vdf/install_child.sh +++ b/lib/chiavdf/fast_vdf/install_child.sh @@ -20,4 +20,4 @@ g++ -o compile_asm compile_asm.o $link_flags ./compile_asm as -o asm_compiled.o asm_compiled.s g++ -o vdf.o -c vdf.cpp $compile_flags -O3 -g++ -o vdf vdf.o asm_compiled.o $link_flags \ No newline at end of file +g++ -o vdf vdf.o asm_compiled.o $link_flags From 5247032828e6b5ccbb0dc79244f7ff9ce4499f35 Mon Sep 17 00:00:00 2001 From: fchirica Date: Fri, 11 Oct 2019 01:16:41 +0300 Subject: [PATCH 13/13] Improved timelord logic --- lib/chiavdf/fast_vdf/vdf.cpp | 14 +++++-- src/blockchain.py | 1 - src/full_node.py | 16 ++++---- src/protocols/timelord_protocol.py | 4 +- src/timelord.py | 65 +++++++++++++++++++----------- 5 files changed, 62 insertions(+), 38 deletions(-) diff --git a/lib/chiavdf/fast_vdf/vdf.cpp b/lib/chiavdf/fast_vdf/vdf.cpp index 8675386c..255834ac 100644 --- a/lib/chiavdf/fast_vdf/vdf.cpp +++ b/lib/chiavdf/fast_vdf/vdf.cpp @@ -686,12 +686,12 @@ void NWesolowskiMain(integer D, form x, int64_t num_iterations, WesolowskiCallba } void PollTimelord(tcp::socket& sock, bool& got_iters) { - // Wait for 60s, if no iters come, poll each 15 seconds the timelord. + // Wait for 15s, if no iters come, poll each 5 seconds the timelord. int seconds = 0; while (!got_iters) { std::this_thread::sleep_for (std::chrono::seconds(1)); seconds++; - if (seconds >= 60 && (seconds - 60) % 15 == 0) { + if (seconds >= 15 && (seconds - 15) % 5 == 0) { socket_mutex.lock(); boost::asio::write(sock, boost::asio::buffer("POLL", 4)); socket_mutex.unlock(); @@ -801,10 +801,18 @@ void session(tcp::socket sock) { } } } + } catch (std::exception& e) { + std::cerr << "Exception in thread: " << e.what() << "\n"; + } + + try { // Tell client I've stopped everything, wait for ACK and close. + boost::system::error_code error; + + std::cout << "Stopped everything! Ready for the next challenge.\n"; + std::lock_guard lock(socket_mutex); boost::asio::write(sock, boost::asio::buffer("STOP", 4)); - std::cout << "Stopped everything! Ready for the next challenge.\n"; char ack[5]; memset(ack,0x00,sizeof(ack)); diff --git a/src/blockchain.py b/src/blockchain.py index b06411b8..f8c63f8c 100644 --- a/src/blockchain.py +++ b/src/blockchain.py @@ -361,7 +361,6 @@ class Blockchain: return False # 4. Check PoT - # TODO(Florin): Change the hardcode of the genesis with new pot format. if not block.trunk_block.proof_of_time.is_valid(self.constants["DISCRIMINANT_SIZE_BITS"]): return False diff --git a/src/full_node.py b/src/full_node.py index 7361395a..b14e66c5 100644 --- a/src/full_node.py +++ b/src/full_node.py @@ -100,7 +100,7 @@ async def send_challenges_to_timelords() -> AsyncGenerator[OutboundMessage, None async with db.lock: for head in db.blockchain.get_current_heads(): challenge_hash = head.challenge.get_hash() - requests.append(timelord_protocol.ChallengeStart(challenge_hash)) + requests.append(timelord_protocol.ChallengeStart(challenge_hash, head.challenge.height)) for request in requests: yield OutboundMessage(NodeType.TIMELORD, Message("challenge_start", request), Delivery.BROADCAST) @@ -608,8 +608,9 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N block.block.trunk_block.challenge.height, pos_quality, difficulty) - timelord_request = timelord_protocol.ChallengeStart(block.block.trunk_block.challenge.get_hash()) - timelord_request_end = timelord_protocol.ChallengeStart(block.block.trunk_block.proof_of_time. + timelord_request = timelord_protocol.ChallengeStart(block.block.trunk_block.challenge.get_hash(), + block.block.trunk_block.challenge.height) + timelord_request_end = timelord_protocol.ChallengeEnd(block.block.trunk_block.proof_of_time. output.challenge_hash) # Tell timelord to stop previous challenge and start with new one yield OutboundMessage(NodeType.TIMELORD, Message("challenge_end", timelord_request_end), Delivery.BROADCAST) @@ -620,10 +621,9 @@ async def block(block: peer_protocol.Block) -> AsyncGenerator[OutboundMessage, N # Tell farmer about the new block yield OutboundMessage(NodeType.FARMER, Message("proof_of_space_finalized", farmer_request), Delivery.BROADCAST) - else: - # Note(Florin): This is a hack... - log.info("I've received a block, stopping the challenge to free up the VDF server...") - log.info(f"Height of received block = {block.block.trunk_block.challenge.height}") - timelord_request_end = timelord_protocol.ChallengeStart(block.block.trunk_block.proof_of_time. + elif added == ReceiveBlockResult.ADDED_AS_ORPHAN: + log.info("I've received an orphan, stopping the proof of time challenge.") + log.info(f"Height of the orphan block is {block.block.trunk_block.challenge.height}") + timelord_request_end = timelord_protocol.ChallengeEnd(block.block.trunk_block.proof_of_time. output.challenge_hash) yield OutboundMessage(NodeType.TIMELORD, Message("challenge_end", timelord_request_end), Delivery.BROADCAST) diff --git a/src/protocols/timelord_protocol.py b/src/protocols/timelord_protocol.py index 81e76584..f61ddc5e 100644 --- a/src/protocols/timelord_protocol.py +++ b/src/protocols/timelord_protocol.py @@ -1,6 +1,6 @@ from src.util.cbor_message import cbor_message from src.types.sized_bytes import bytes32 -from src.util.ints import uint64 +from src.util.ints import uint32, uint64 from src.types.proof_of_time import ProofOfTime """ @@ -20,7 +20,7 @@ class ProofOfTimeFinished: @cbor_message(tag=3001) class ChallengeStart: challenge_hash: bytes32 - + height: uint32 @cbor_message(tag=3002) class ChallengeEnd: diff --git a/src/timelord.py b/src/timelord.py index c2e11303..3a08577e 100644 --- a/src/timelord.py +++ b/src/timelord.py @@ -21,10 +21,11 @@ class Database: lock: Lock = Lock() free_servers: List[int] = [] active_discriminants: Dict = {} + pending_iters: Dict = {} + best_height = 0 done_discriminants = [] seen_discriminants = [] - counter = 0 - active_counters = [] + active_heights = [] log = logging.getLogger(__name__) @@ -43,27 +44,35 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): """ disc: int = create_discriminant(challenge_start.challenge_hash, constants["DISCRIMINANT_SIZE_BITS"]) - async with db.lock: if (challenge_start.challenge_hash in db.seen_discriminants): log.info("Already seen this one... Ignoring") return db.seen_discriminants.append(challenge_start.challenge_hash) - db.counter += 1 - current_counter = db.counter - db.active_counters.append(db.counter) + db.active_heights.append(challenge_start.height) + db.best_height = max(db.best_height, challenge_start.height) # Wait for a server to become free. port: int = -1 while port == -1: async with db.lock: - if (len(db.free_servers) != 0): - port = db.free_servers[0] - db.free_servers = db.free_servers[1:] - log.info(f"Discriminant {disc} attached to port {port}.") + if (challenge_start.height <= db.best_height - 5): + db.done_discriminants.append(challenge_start.challenge_hash) + db.active_heights.remove(challenge_start.height) + log.info(f"Stopping challenge at height {challenge_start.height}") + return + assert(len(db.active_heights) > 0) + if (challenge_start.height == max(db.active_heights)): + if (len(db.free_servers) != 0): + port = db.free_servers[0] + db.free_servers = db.free_servers[1:] + log.info(f"Discriminant {disc} attached to port {port}.") + log.info(f"Height attached is {challenge_start.height}") + db.active_heights.remove(challenge_start.height) + # Poll until a server becomes free. if port == -1: - await asyncio.sleep(3) + await asyncio.sleep(0.1) # TODO(Florin): Handle connection failure (attempt another server) try: @@ -83,6 +92,13 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): async with db.lock: db.active_discriminants[challenge_start.challenge_hash] = writer + async with db.lock: + if (challenge_start.challenge_hash in db.pending_iters): + for iter in db.pending_iters[challenge_start.challenge_hash]: + writer.write((str(len(str(iter))) + + str(iter)).encode()) + await writer.drain() + # Listen to the server until "STOP" is received. while True: data = await reader.readexactly(4) @@ -96,7 +112,7 @@ async def challenge_start(challenge_start: timelord_protocol.ChallengeStart): elif (data.decode() == "POLL"): async with db.lock: # If I have a newer discriminant... Free up the VDF server - if (current_counter < max(db.active_counters)): + if (challenge_start.height < max(db.active_heights)): log.info("Got poll, stopping the challenge!") writer.write(b'10') await writer.drain() @@ -156,15 +172,16 @@ async def proof_of_space_info(proof_of_space_info: timelord_protocol.ProofOfSpac many iterations to run for. """ - while True: - async with db.lock: - if (proof_of_space_info.challenge_hash in db.active_discriminants): - writer = db.active_discriminants[proof_of_space_info.challenge_hash] - writer.write((str(len(str(proof_of_space_info.iterations_needed))) - + str(proof_of_space_info.iterations_needed)).encode()) - await writer.drain() - return - if (proof_of_space_info.challenge_hash in db.done_discriminants): - log.info("Got iters for a finished challenge") - return - await asyncio.sleep(0.5) + async with db.lock: + if (proof_of_space_info.challenge_hash in db.active_discriminants): + writer = db.active_discriminants[proof_of_space_info.challenge_hash] + writer.write((str(len(str(proof_of_space_info.iterations_needed))) + + str(proof_of_space_info.iterations_needed)).encode()) + await writer.drain() + return + if (proof_of_space_info.challenge_hash in db.done_discriminants): + return + if (proof_of_space_info.challenge_hash not in db.pending_iters): + db.pending_iters[proof_of_space_info.challenge_hash] = [] + db.pending_iters[proof_of_space_info.challenge_hash].append(proof_of_space_info.iterations_needed) +